Model

library(DiagrammeR) 
# Nodes
 #node [shape = box]
 # S [label = 'Matched\n(S=1)',fontsize=7]
 # C [label = 'Not censored\n(C=0)',fontsize=7]
gr1<-
DiagrammeR::grViz("
digraph causal {

# Nodes
  node [shape = plaintext]
  a [label = 'Observed\nConfounders\n(Z)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Residential\nPrograms\n(X)',fontsize=10]

# Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB;
  
  b -> c 
  b -> a 
  a -> c  

  d -> c [minlen=1]
  d -> a [minlen=1]
  
 # a -> S #[minlen=1]
 # Z -> S #[minlen=1]
  
#  a -> C #[minlen=3]
#  Z -> C #[minlen=3]
  { rank = same; b; a; c }
# { rank = same; S; C }
  { rankdir = LR; a; d }

# Graph
  graph [overlap = true]
}")
gr1

Figure 1. Directed Acyclic Graph

#  {rank=same ; A -> B -> C -> D};
#       {rank=same ;           F -> E[dir=back]};
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3733703/
#Cohort matching on a variable associated with both outcome and censoring
#Cohort matching on a confounder. We let A denote an exposure, Y denote an outcome, and C denote a confounder and matching variable. The variable S indicates whether an individual in the source population is selected for the matched study (1: selected, 0: not selected). See Section 2-7 for details.
#https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7064555/
gr2<-
DiagrammeR::grViz("
digraph causal {

  # Nodes
  node [shape = plaintext]
  a [label = 'Residential\nPrograms\n(X)',fontsize=10]
  b [label = 'Unobserved\nConfounders\n(U)',fontsize=10]
  c [label = 'Early\nDrop-out\n(Y)',fontsize=10]
  d [label = 'Observed\nConfounders\n(Z)',fontsize=10]

  # Edges
  edge [color = black,
        arrowhead = vee]
  rankdir = TB
  a -> c [minlen=3]
  d -> a [minlen=3]
  d -> c [minlen=9]
  
  b -> a [minlen=1]
  b -> c
  
{ rank = same; c; d }
#{ rank = same; b; d }
  rankdir = TB
{ rank = same; d; c } #Ver si lo saco, creo que da problemas
  
  # Graph
  graph [overlap = true]
}")#LR

Balance

We selected treatments at baseline for each user, leaving 85,048 observations. Then, we distinguished between residential 12,706 and ambulatory (72,267) treatments. We imputed cases that did not have a defined treatment assigned 75.


We selected the following variables of interest:

  • ā€œStarting Substanceā€ (sus_ini_mvv)
  • ā€œMarital Statusā€ (estado_conyugal_2)
  • ā€œEducational Attainmentā€ (escolaridad_rec)
  • ā€œAge of Onset of Drug Useā€ (edad_ini_cons)
  • ā€œFrequency of use of primary drugā€ (freq_cons_sus_prin)
  • ā€œMotive of Admission to Treatmentā€ (origen_ingreso_mod)
  • ā€œPsychiatric co-morbidityā€ (dg_cie_10_rec)
  • ā€œDrug Dependenceā€ (dg_trs_cons_sus_or)
  • ā€œChilean Region of the Centerā€ (nombre_region)
  • ā€œType of Center (Public)ā€ (tipo_centro_pub)
  • ā€œSexā€ (sexo_2)
  • ā€œAge at Admission to Treatmentā€ (edad_al_ing)
  • ā€œDate of Admission to Treatmentā€ (fech_ing_num)
  • ā€œEvaluation of the Therapeutic Processā€ (*) (evaluacindelprocesoteraputico)
  • ā€œEarly Dropout (Against Staff Advice)ā€ (abandono_temprano_rec) (Y)
  • ā€œResidential Type of Planā€ (tipo_de_plan_res) (Z)


library(compareGroups)

match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","abandono_temprano_rec","tipo_de_plan_res","duplicates_filtered","dg_trs_cons_sus_or","evaluacindelprocesoteraputico")
#dg_trs_cons_sus_or

CONS_C1_df_dup_SEP_2020_match<-
  CONS_C1_df_dup_SEP_2020 %>% 
  dplyr::filter(dup==1) %>% #, tipo_de_plan_2 %in% c("PG-PR","M-PR","PG-PAI","M-PAI","PG-PAB","M-PAB")
  dplyr::mutate(tipo_de_plan_res=dplyr::case_when(grepl("PR",as.character(tipo_de_plan_2))~1,
                                                  grepl("PAI",as.character(tipo_de_plan_2))~0,
                                                  grepl("PAB",as.character(tipo_de_plan_2))~0,
                                                  TRUE~NA_real_)) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(tipo_de_plan_res)) %>% 
  dplyr::mutate(abandono_temprano_rec=factor(if_else(as.character(motivodeegreso_mod_imp)=="Early Drop-out",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(if_else(as.character(dg_trs_cons_sus_or)=="Drug dependence",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(tipo_centro_pub=factor(if_else(as.character(tipo_centro)=="Public",TRUE,FALSE,NA))) %>% 
  dplyr::mutate(condicion_ocupacional_corr=factor(condicion_ocupacional_corr),cat_ocupacional_corr=factor(cat_ocupacional_corr)) %>% 
  dplyr::mutate(dg_trs_fis_rec=factor(dplyr::case_when(as.character(diagnostico_trs_fisico)=="En estudio"~"Diagnosis unknown (under study)",as.character(diagnostico_trs_fisico)=="Sin trastorno"~'Without physical comorbidity',cnt_diagnostico_trs_fisico>0 ~'With physical comorbidity',
                                             TRUE~NA_character_)))%>%
    dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered=T,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%   
dplyr::mutate(freq_cons_sus_prin=parse_factor(as.character(freq_cons_sus_prin),levels=c('Did not use', 'Less than 1 day a week','2 to 3 days a week','4 to 6 days a week','1 day a week or more','Daily'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=dplyr::case_when(grepl("1",as.character(evaluacindelprocesoteraputico))~'1-High Achievement',grepl("2",as.character(evaluacindelprocesoteraputico))~'2-Medium Achievement',grepl("3",as.character(evaluacindelprocesoteraputico))~'3-Minimum Achievement', TRUE~as.character(evaluacindelprocesoteraputico))) %>% 
  dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select_(.dots = match.on_tot) %>% 
  dplyr::mutate(more_one_treat=factor(ifelse(duplicates_filtered>1,1,0))) %>% 
  data.table::data.table()
## Warning: `select_()` was deprecated in dplyr 0.7.0.
## Please use `select()` instead.
#CONS_C1_df_dup_SEP_2020_match %>% 
  #dplyr::group_by(dg_trs_fis) %>% dplyr::summarise(q1=quantile(dias_treat_imp_sin_na,.25),q2=quantile(dias_treat_imp_sin_na,.5),q3=quantile(dias_treat_imp_sin_na,.75)) ---> las distribuciones por dƭas de tratamiento de las categorƭas de respuesta tienden a ser bastante similares, aunquequienes tienen una comorbiliad fƭsica definida tienen mƔs tiempo en el estudio.
invisible("La diferencia en dƭas de tratamiento entre las categorƭas de enfermedad psiquiƔtrica, indica que quienes se encuentran en estudio tienen muchos menos dƭas en tratamiento que quienes no tienen una comorbilidad o quienes tienen una definida. No es lo mismo con el caso de la enfermedad fƭsica, en donde tienden a ser bastante similares")

invisible("Decidí no incluir diagnóstico de enferemedad física, porque hay algunas condiciones que son crónicas o que pueden serlo, y que no tengo cómo validarlas a lo largo del tratamiento")
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:

attr(CONS_C1_df_dup_SEP_2020_match$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match$origen_ingreso_mod,"label")<-"Motive of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$dg_cie_10_rec,"label")<-"Psychiatric co-morbidity"
attr(CONS_C1_df_dup_SEP_2020_match$nombre_region,"label")<-"Chilean Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_centro_pub,"label")<-"Type of Center (Public)"
attr(CONS_C1_df_dup_SEP_2020_match$sexo_2,"label")<-"Sex"
attr(CONS_C1_df_dup_SEP_2020_match$edad_al_ing,"label")<-"Age at Admission"
attr(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,"label")<-"Date of Admission to Treatment"
attr(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,"label")<-"Early Dropout"
attr(CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res,"label")<-"Residential Type of Plan"
attr(CONS_C1_df_dup_SEP_2020_match$duplicates_filtered,"label")<-"No. of Treatments in the Database"
attr(CONS_C1_df_dup_SEP_2020_match$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"

knitr::opts_chunk$set(echo = FALSE, warning=FALSE, message=FALSE)

table1_all <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ duplicates_filtered+ dg_trs_cons_sus_or+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            duplicates_filtered=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T)
)
table1_more_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==1)
)
table1_only_one <- suppressWarnings(compareGroups(tipo_de_plan_res ~ sus_ini_mod_mvv+ estado_conyugal_2+ escolaridad_rec+ edad_ini_cons+ freq_cons_sus_prin+ origen_ingreso_mod+ dg_cie_10_rec+ dg_trs_cons_sus_or+ nombre_region+ tipo_centro_pub+ sexo_2+ dg_trs_cons_sus_or+ edad_al_ing+ fech_ing_num+ abandono_temprano_rec+ evaluacindelprocesoteraputico, method= c(
                                            sus_ini_mod_mvv=3,
                                            estado_conyugal_2=3,
                                            escolaridad_rec=3,
                                            edad_ini_cons=3,
                                            freq_cons_sus_prin=3,
                                            origen_ingreso_mod=3,
                                            dg_cie_10_rec=3,
                                            dg_trs_cons_sus_or=3,
                                            nombre_region=3,
                                            tipo_centro_pub=3,
                                            sexo_2=3,
                                            dg_trs_cons_sus_or=3,
                                            edad_al_ing=2,
                                            fech_ing_num=2,
                                            abandono_temprano_rec=3,
                                            evaluacindelprocesoteraputico=3),
                       data = CONS_C1_df_dup_SEP_2020_match,
                       include.miss = T,
                       var.equal=T,
                       subset= more_one_treat==0)
)
 #Possible values are: 1 - for analysis as "normal-distributed"; 2 - forces analysis as "continuous non-normal"; 3 - forces analysis as "categorical"; and 4 - NA, which performs a Shapiro-Wilks test to decide between normal or non-normal. 

restab1_all <- createTable(table1_all, show.p.overall = T)
restab1_more_one <- createTable(table1_more_one, show.p.overall = T)
restab1_only_one <- createTable(table1_only_one, show.p.overall = T)

pvals1 <- getResults(table1_all)
#p.adjust(pvals, method = "BH")
 export2md(restab1_all, size=10, first.strip=T, hide.no="no", position="center",
           format="html",caption= "Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019",col.names=c("Variables","Residential", "Ambulatory", "p-value"))%>%
  kableExtra::add_footnote(c("Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;", "Categorical variables are presented as number (%)"), notation = "none")%>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 1. Summary descriptives at baseline, between Users with Residential and Ambulatory Treatments from 2010-2019
Variables Residential Ambulatory p-value
N=72267 N=12706
Starting Substance: 0.000
Alcohol 41507 (57.4%) 5080 (40.0%)
Cocaine hydrochloride 2682 (3.71%) 477 (3.75%)
Marijuana 18412 (25.5%) 4556 (35.9%)
Other 1669 (2.31%) 318 (2.50%)
Cocaine paste 2767 (3.83%) 1086 (8.55%)
ā€˜Missing’ 5230 (7.24%) 1189 (9.36%)
Marital Status: <0.001
Married/Shared living arrangements 26185 (36.2%) 2910 (22.9%)
Separated/Divorced 7721 (10.7%) 1320 (10.4%)
Single 37343 (51.7%) 8328 (65.5%)
Widower 869 (1.20%) 133 (1.05%)
ā€˜Missing’ 149 (0.21%) 15 (0.12%)
Educational Attainment: <0.001
3-Completed primary school or less 20062 (27.8%) 3862 (30.4%)
2-Completed high school or less 39565 (54.7%) 7044 (55.4%)
1-More than high school 12279 (17.0%) 1777 (14.0%)
ā€˜Missing’ 361 (0.50%) 23 (0.18%)
Frequency of use of primary drug: 0.000
Did not use 1095 (1.52%) 85 (0.67%)
Less than 1 day a week 2862 (3.96%) 133 (1.05%)
2 to 3 days a week 22372 (31.0%) 1329 (10.5%)
4 to 6 days a week 12258 (17.0%) 1654 (13.0%)
1 day a week or more 5335 (7.38%) 272 (2.14%)
Daily 27938 (38.7%) 9219 (72.6%)
ā€˜Missing’ 407 (0.56%) 14 (0.11%)
Motive of Admission to Treatment: 0.000
Spontaneous 33720 (46.7%) 4273 (33.6%)
Assisted Referral 4950 (6.85%) 3013 (23.7%)
Other 3766 (5.21%) 740 (5.82%)
Justice Sector 7159 (9.91%) 812 (6.39%)
Health Sector 22672 (31.4%) 3868 (30.4%)
Psychiatric co-morbidity: <0.001
Without psychiatric comorbidity 29070 (40.2%) 3245 (25.5%)
Diagnosis unknown (under study) 13310 (18.4%) 2771 (21.8%)
With psychiatric comorbidity 29887 (41.4%) 6690 (52.7%)
Type of Center (Public): 0.000
FALSE 14964 (20.7%) 9066 (71.4%)
TRUE 57300 (79.3%) 3623 (28.5%)
ā€˜Missing’ 3 (0.00%) 17 (0.13%)
Sex: <0.001
Men 54806 (75.8%) 8761 (69.0%)
Women 17461 (24.2%) 3945 (31.0%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
ā€˜Missing’ 1 (0.00%) 0 (0.00%)
Age at Admission 34.5 [27.6;43.5] 32.6 [26.3;40.9] <0.001
Date of Admission to Treatment 16577 [15730;17359] 16154 [15342;17023] <0.001
Early Dropout: <0.001
FALSE 61074 (84.5%) 10201 (80.3%)
TRUE 11190 (15.5%) 2499 (19.7%)
ā€˜Missing’ 3 (0.00%) 6 (0.05%)
No.Ā of Treatments in the Database: .
1 58708 (81.2%) 8533 (67.2%)
2 10087 (14.0%) 2804 (22.1%)
3 2471 (3.42%) 927 (7.30%)
4 714 (0.99%) 295 (2.32%)
5 192 (0.27%) 94 (0.74%)
6 67 (0.09%) 36 (0.28%)
7 23 (0.03%) 11 (0.09%)
8 4 (0.01%) 6 (0.05%)
10 1 (0.00%) 0 (0.00%)
Drug Dependence: 0.000
FALSE 22150 (30.7%) 1049 (8.26%)
TRUE 50116 (69.3%) 11657 (91.7%)
ā€˜Missing’ 1 (0.00%) 0 (0.00%)
Evaluation of the Therapeutic Process: <0.001
1-High Achievement 14081 (19.5%) 2831 (22.3%)
2-Medium Achievement 21728 (30.1%) 4237 (33.3%)
3-Minimum Achievement 31549 (43.7%) 5302 (41.7%)
ā€˜Missing’ 4909 (6.79%) 336 (2.64%)
Note. Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


Of the 85,048 users, we selected 85,048 that fulfilled the conditions stated above (100%).


Additionally, we generated a correlation plot to get an overview of heterogeneous correlations between the different variables.


require(polycor)
#Corresponde a la apreciación clínica que hace el equipo o profesional tratante, la persona en tratamiento y su familia, del nivel alcanzado de logro de los objetivos terapéuticos planteados al inicio del proceso y descritos en el plan de tratamiento personalizado. Los criterios incluyen la evaluación del estado clínico y psicosocial al momento del egreso y una apreciación pronostica del equipo tratante.

#Computes a heterogenous correlation matrix, consisting of Pearson product-moment correlations between numeric variables, polyserial correlations between numeric and ordinal variables, and polychoric correlations between 
tiempo_antes_hetcor<-Sys.time()
hetcor_mat<-hetcor(CONS_C1_df_dup_SEP_2020_match[,-c("hash_key","row","more_one_treat","duplicates_filtered")], ML = T, std.err =T, use="pairwise.complete.obs", bins=3, pd=TRUE)
tiempo_despues_hetcor<-Sys.time()
tiempo_hetcor<-tiempo_despues_hetcor-tiempo_antes_hetcor

attr(hetcor_mat$correlations,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$correlations,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$correlations,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$correlations,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$correlations,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$correlations,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$correlations,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$correlations,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$correlations,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$correlations,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$correlations,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$correlations,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$correlations,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$correlations,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$correlations,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[2]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[2]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[2]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[2]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[2]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[2]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[2]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[2]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[2]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[2]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[2]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[2]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[2]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[2]][16]<-"Evaluation of the Therapeutic Process"

attr(hetcor_mat$tests,"dimnames")[[1]][1]<-"Starting Substance"
attr(hetcor_mat$tests,"dimnames")[[1]][2]<-"Marital Status"
attr(hetcor_mat$tests,"dimnames")[[1]][3]<-"Educational Attainment"
attr(hetcor_mat$tests,"dimnames")[[1]][4]<-"Age of Onset of Drug Use"
attr(hetcor_mat$tests,"dimnames")[[1]][5]<-"Frequency of use of primary drug"
attr(hetcor_mat$tests,"dimnames")[[1]][6]<-"Motive of Admission to Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][7]<-"Psychiatric comorbidity"
#attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Physical comorbidity"
attr(hetcor_mat$tests,"dimnames")[[1]][8]<-"Chilean Region of the Center"
attr(hetcor_mat$tests,"dimnames")[[1]][9]<-"Type of Center (Public)"
attr(hetcor_mat$tests,"dimnames")[[1]][10]<-"Sex"
attr(hetcor_mat$tests,"dimnames")[[1]][11]<-"Age at Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][12]<-"Date of Admission"
attr(hetcor_mat$tests,"dimnames")[[1]][13]<-"Early Drop out"
attr(hetcor_mat$tests,"dimnames")[[1]][14]<-"Residential Treatment"
attr(hetcor_mat$tests,"dimnames")[[1]][15]<-"Drug Dependence"
attr(hetcor_mat$tests,"dimnames")[[1]][16]<-"Evaluation of the Therapeutic Process"

hetcor_mat$tests[is.na(hetcor_mat$tests)]<-1

ggcorrplot<-
ggcorrplot::ggcorrplot(hetcor_mat$correlations,
           ggtheme = ggplot2::theme_void,
           insig = "blank",
           pch=1,
           pch.cex=3,
           tl.srt = 45, 
           #pch="ns",
            p.mat = hetcor_mat$tests, #  replacement has 144 rows, data has 169
            #type = "lower",
           colors = c("#6D9EC1", "white", "#E46726"), 
           tl.cex=8,
           lab=F)+
  #scale_x_discrete(labels = var_lbls_p345, drop = F) +
  #scale_y_discrete(labels = var_lbls_p345, drop = F) +
  theme(axis.text.x = element_blank())+
  #theme(axis.text.y = element_text(size=7.5,color ="black", hjust = 1))+
  theme(axis.text.y = element_blank())+
  theme(legend.position="bottom")

ggplotly(ggcorrplot, height = 800, width=800)%>% 
  layout(xaxis= list(showticklabels = FALSE)) %>% 
 layout(annotations = 
 list(x = .1, y = -0.031, text = "", 
      showarrow = F, xref='paper', yref='paper', 
      #xanchor='center', yanchor='auto', xshift=0, yshift=-0,
      font=list(size=11, color="darkblue"))
 )

Figure 2. Heterogeneous Correlation Matrix of Variables of Interest


Imputation


We generated a plot to see all the missing values in the sample.


Figure 3. Bar plot of Porcentaje of Missing Values per Variables at Basline






From the figure above, we could see that the starting substance (sus_ini_mvv), the onset of drug use (edad_ini_cons) and the evaluation of the therapeutic process (evaluacindelprocesoteraputico) had around 6% of missing data. These values should be imputed. We first focused on the age of onset of drug use. It is important to consider that the evaluation of the therapeutic process could be distorted due to censoring (many users did not finish their treatment, and did not have this evaluation in the study period).



Age at Admission

We started looking over the missing values in the age at admission (n8). Since there were not cases with more than one treatment that had not an age of admission, we did not have to impute taking into account serial dependencies in the dates of treatment.

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission

Figure 5. Density Estimation of Distributions of Age at Admission & Imputed Age at Admission


As seen in the Figure above, distributions seem to differ. However, considering the low amount of missing values in this variable, we proceeded with the imputation with the mean, despite the differences found. The imputed values must not be greater than the age of onset of drug use and may not be lower than 16 years old. Values lower than this age may be considered less likely to receive treatment for adult population, so it would be most probably incorrect that they would be in this database.


## [1] "Users that had more than one treatment with no date of admission:0"


Age of Onset of Drug Use

Another variable worth imputing is the Age of Onset of Drug Use (n= 6,549).


Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones

Figure 6. Density Estimation of Distributions of Age Of Onset of Drug Use & Imputed Ones


Based on the figure above, the age of onset of drug use was similar between the imputed values and the observed. However, we followed the rules stated in Duplicates process (link). There were three logical conditions to fulfill in order to replace adequately these values in the database: the age of onset must not be greater than the age of onset of drug use in the primary substance at admission (1), may not be greater than the age of admission to treatment (2), and the age of onset of drug use must be greater than 4 years old. Then, we selected the minimum value of age of onset of drug use among the imputed, because one user could not have more than one age of onset of drug use.


## [1] "Number of users that had more than one different age of onset of drug use before replacement: 0"

Figure 7. Bar plot of Percentage of Incorrect Imputed Values per Imputation Sample

## [1] "Cases with more than missing one age of onset: 515"
## [1] "Number of rows with values that did not fulfilled the conditions: 0"
## [1] "Number of rows with values that did not fulfilled the conditions after replacement with the minimum by users: 0"
## [1] "Number of users that had different age of onset of drug use after replacement: 0"



There were 0 cases of imputed ages of onset of drug use that did not fulfilled the conditions necessary to replace the missing values with the imputed ones.


Starting Substance

Then we selected the most vulnerable value among the candidates of imputations of the starting substance (First, Cocaine paste, Cocaine hydrochloride or snort cocaine, Marijuana, Alcohol, and Other).


# Ver distintos valores propuestos para sustancia de inciio
sus_ini_mod_mvv_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$sus_ini_mod_mvv,
       amelia_fit$imputations$imp2$sus_ini_mod_mvv,
       amelia_fit$imputations$imp3$sus_ini_mod_mvv,
       amelia_fit$imputations$imp4$sus_ini_mod_mvv,
       amelia_fit$imputations$imp5$sus_ini_mod_mvv,
       amelia_fit$imputations$imp6$sus_ini_mod_mvv,
       amelia_fit$imputations$imp7$sus_ini_mod_mvv,
       amelia_fit$imputations$imp8$sus_ini_mod_mvv,
       amelia_fit$imputations$imp9$sus_ini_mod_mvv,
       amelia_fit$imputations$imp10$sus_ini_mod_mvv,
       amelia_fit$imputations$imp11$sus_ini_mod_mvv,
       amelia_fit$imputations$imp12$sus_ini_mod_mvv,
       amelia_fit$imputations$imp13$sus_ini_mod_mvv,
       amelia_fit$imputations$imp14$sus_ini_mod_mvv,
       amelia_fit$imputations$imp15$sus_ini_mod_mvv,
       amelia_fit$imputations$imp16$sus_ini_mod_mvv,
       amelia_fit$imputations$imp17$sus_ini_mod_mvv,
       amelia_fit$imputations$imp18$sus_ini_mod_mvv,
       amelia_fit$imputations$imp19$sus_ini_mod_mvv,
       amelia_fit$imputations$imp20$sus_ini_mod_mvv,
       amelia_fit$imputations$imp21$sus_ini_mod_mvv,
       amelia_fit$imputations$imp22$sus_ini_mod_mvv,
       amelia_fit$imputations$imp23$sus_ini_mod_mvv,
       amelia_fit$imputations$imp24$sus_ini_mod_mvv,
       amelia_fit$imputations$imp25$sus_ini_mod_mvv,
       amelia_fit$imputations$imp26$sus_ini_mod_mvv,
       amelia_fit$imputations$imp27$sus_ini_mod_mvv,
       amelia_fit$imputations$imp28$sus_ini_mod_mvv,
       amelia_fit$imputations$imp29$sus_ini_mod_mvv,
       amelia_fit$imputations$imp30$sus_ini_mod_mvv
       ) 

sus_ini_mod_mvv_imputed<-
sus_ini_mod_mvv_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Marijuana",as.character(.))~1,TRUE~0), .names="mar_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Alcohol",as.character(.))~1,TRUE~0), .names="oh_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine paste",as.character(.))~1,TRUE~0), .names="pb_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Cocaine hydrochloride",as.character(.))~1,TRUE~0), .names="coc_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.sus_ini_mod_mvv:amelia_fit.imputations.imp30.sus_ini_mod_mvv),~dplyr::case_when(grepl("Other",as.character(.))~1,TRUE~0), .names="otr_{col}"))%>%
        dplyr::mutate(sus_ini_mod_mvv_mar = base::rowSums(dplyr::select(., starts_with("mar_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_oh = base::rowSums(dplyr::select(., starts_with("oh_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_pb = base::rowSums(dplyr::select(., starts_with("pb_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_coc = base::rowSums(dplyr::select(., starts_with("coc_"))))%>%
  dplyr::mutate(sus_ini_mod_mvv_otr = base::rowSums(dplyr::select(., starts_with("otr_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_mar>0~1,TRUE~0)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_oh>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_pb>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_coc>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_tot=dplyr::case_when(sus_ini_mod_mvv_otr>0~sus_ini_mod_mvv_tot+1,TRUE~sus_ini_mod_mvv_tot)) %>% 
  dplyr::mutate(sus_ini_mod_mvv_to_imputation=dplyr::case_when(sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot==1 & sus_ini_mod_mvv_otr>0~"Other",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_pb>0~"Cocaine paste",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_coc>0~"Cocaine hydrochloride",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_mar>0~"Marijuana",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_oh>0~"Alcohol",sus_ini_mod_mvv_tot>1 & sus_ini_mod_mvv_otr>0~"Other")) %>% 
  janitor::clean_names()

sus_ini_mod_mvv_imputed<-
dplyr::select(sus_ini_mod_mvv_imputed,amelia_fit_imputations_imp1_row,sus_ini_mod_mvv_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss2<-
CONS_C1_df_dup_SEP_2020_match_miss1 %>% 
   dplyr::left_join(sus_ini_mod_mvv_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(sus_ini_mod_mvv=factor(dplyr::case_when(is.na(sus_ini_mod_mvv)~as.character(sus_ini_mod_mvv_to_imputation),
                                 TRUE~as.character(sus_ini_mod_mvv)))) %>% 
  dplyr::select(-sus_ini_mod_mvv_to_imputation) %>% 
  data.table()
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#
#_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_##_#_#_#_#_#_#__#_##_#_#_#_#_#_#_#_#_#_#_#_#__#_##_#_#_#_#_#


Frequency of Use of the Primary Drug at Admission

Another variable that is worth imputing is the Frequency of use of primary drug at admission (n= 568). In case of ties, we selected the imputed values with the value with the most frequent drug use.


# Ver distintos valores propuestos para sustancia de inciio
freq_cons_sus_prin_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$freq_cons_sus_prin,
       amelia_fit$imputations$imp2$freq_cons_sus_prin,
       amelia_fit$imputations$imp3$freq_cons_sus_prin,
       amelia_fit$imputations$imp4$freq_cons_sus_prin,
       amelia_fit$imputations$imp5$freq_cons_sus_prin,
       amelia_fit$imputations$imp6$freq_cons_sus_prin,
       amelia_fit$imputations$imp7$freq_cons_sus_prin,
       amelia_fit$imputations$imp8$freq_cons_sus_prin,
       amelia_fit$imputations$imp9$freq_cons_sus_prin,
       amelia_fit$imputations$imp10$freq_cons_sus_prin,
       amelia_fit$imputations$imp11$freq_cons_sus_prin,
       amelia_fit$imputations$imp12$freq_cons_sus_prin,
       amelia_fit$imputations$imp13$freq_cons_sus_prin,
       amelia_fit$imputations$imp14$freq_cons_sus_prin,
       amelia_fit$imputations$imp15$freq_cons_sus_prin,
       amelia_fit$imputations$imp16$freq_cons_sus_prin,
       amelia_fit$imputations$imp17$freq_cons_sus_prin,
       amelia_fit$imputations$imp18$freq_cons_sus_prin,
       amelia_fit$imputations$imp19$freq_cons_sus_prin,
       amelia_fit$imputations$imp20$freq_cons_sus_prin,
       amelia_fit$imputations$imp21$freq_cons_sus_prin,
       amelia_fit$imputations$imp22$freq_cons_sus_prin,
       amelia_fit$imputations$imp23$freq_cons_sus_prin,
       amelia_fit$imputations$imp24$freq_cons_sus_prin,
       amelia_fit$imputations$imp25$freq_cons_sus_prin,
       amelia_fit$imputations$imp26$freq_cons_sus_prin,
       amelia_fit$imputations$imp27$freq_cons_sus_prin,
       amelia_fit$imputations$imp28$freq_cons_sus_prin,
       amelia_fit$imputations$imp29$freq_cons_sus_prin,
       amelia_fit$imputations$imp30$freq_cons_sus_prin
       ) 

freq_cons_sus_prin_imputed<-
freq_cons_sus_prin_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("1 day a week or more",as.character(.))~1,TRUE~0), .names="1_day_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("2 to 3 days a week",as.character(.))~1,TRUE~0), .names="2_3_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("4 to 6 days a week",as.character(.))~1,TRUE~0), .names="4_6_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Less than 1 day a week",as.character(.))~1,TRUE~0), .names="less_1_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Did not use",as.character(.))~1,TRUE~0), .names="did_not_{col}"))%>%
    dplyr::mutate(across(c(amelia_fit.imputations.imp1.freq_cons_sus_prin:amelia_fit.imputations.imp30.freq_cons_sus_prin),~dplyr::case_when(grepl("Daily",as.character(.))~1,TRUE~0), .names="daily_{col}"))%>%
  dplyr::mutate(freq_cons_sus_prin_daily = base::rowSums(dplyr::select(., starts_with("daily_")))) %>% 
  dplyr::mutate(freq_cons_sus_prin_4_6 = base::rowSums(dplyr::select(., starts_with("4_6_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_2_3 = base::rowSums(dplyr::select(., starts_with("2_3_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_1_day = base::rowSums(dplyr::select(., starts_with("1_day_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_less_1 = base::rowSums(dplyr::select(., starts_with("less_1_"))))%>%
  dplyr::mutate(freq_cons_sus_prin_did_not = base::rowSums(dplyr::select(., starts_with("did_not_")))) %>% 
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_1_day>0~1,TRUE~0)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_2_3>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_4_6>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_less_1>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_did_not>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  dplyr::mutate(freq_cons_sus_prin_tot=dplyr::case_when(freq_cons_sus_prin_daily>0~freq_cons_sus_prin_tot+1,TRUE~freq_cons_sus_prin_tot)) %>% 
  #hierarchy
  dplyr::mutate(freq_cons_sus_prin_to_imputation=
                  dplyr::case_when(freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_daily>0~"Daily",
                                     freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot==1 & freq_cons_sus_prin_did_not>0~"Did not use",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_daily>0~"Daily",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_4_6>0~"4 to 6 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_2_3>0~"2 to 3 days a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_1_day>0~"1 day a week or more",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_less_1>0~"Less than 1 day a week",freq_cons_sus_prin_tot>1 & freq_cons_sus_prin_did_not>0~"Did not use")) %>% 
  janitor::clean_names()

freq_cons_sus_prin_imputed<-
dplyr::select(freq_cons_sus_prin_imputed,amelia_fit_imputations_imp1_row,freq_cons_sus_prin_to_imputation)

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss3<-
CONS_C1_df_dup_SEP_2020_match_miss2 %>% 
   dplyr::left_join(freq_cons_sus_prin_imputed, by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(freq_cons_sus_prin=factor(dplyr::case_when(is.na(freq_cons_sus_prin)~as.character(freq_cons_sus_prin_to_imputation), TRUE~as.character(freq_cons_sus_prin)))) %>% 
  data.table()


Educational Attainment

Another variable that is worth imputing is the Educational Attainment (n= 437). we followed the rules stated in Duplicates4 process (link). We were particularly cautious to impute attainments that would follow a progression from primary school to more than high school. For this purpose, we first looked over the actual values per user, filling intermediate gaps in educational attainment in users with intermediate null values (a), we overcame with the difficulty of the incorrect imputations, by logically selecting if there were any .


# Ver distintos valores propuestos para sustancia de inciio
escolaridad_rec_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
                  amelia_fit$imputations$imp1$hash_key,
                  amelia_fit$imputations$imp1$fech_ing_num,
                  amelia_fit$imputations$imp1$escolaridad_rec,
                  amelia_fit$imputations$imp2$escolaridad_rec,
                  amelia_fit$imputations$imp3$escolaridad_rec,
                  amelia_fit$imputations$imp4$escolaridad_rec,
                  amelia_fit$imputations$imp5$escolaridad_rec,
                  amelia_fit$imputations$imp6$escolaridad_rec,
                  amelia_fit$imputations$imp7$escolaridad_rec,
                  amelia_fit$imputations$imp8$escolaridad_rec,
                  amelia_fit$imputations$imp9$escolaridad_rec,
                  amelia_fit$imputations$imp10$escolaridad_rec,
                  amelia_fit$imputations$imp11$escolaridad_rec,
                  amelia_fit$imputations$imp12$escolaridad_rec,
                  amelia_fit$imputations$imp13$escolaridad_rec,
                  amelia_fit$imputations$imp14$escolaridad_rec,
                  amelia_fit$imputations$imp15$escolaridad_rec,
                  amelia_fit$imputations$imp16$escolaridad_rec,
                  amelia_fit$imputations$imp17$escolaridad_rec,
                  amelia_fit$imputations$imp18$escolaridad_rec,
                  amelia_fit$imputations$imp19$escolaridad_rec,
                  amelia_fit$imputations$imp20$escolaridad_rec,
                  amelia_fit$imputations$imp21$escolaridad_rec,
                  amelia_fit$imputations$imp22$escolaridad_rec,
                  amelia_fit$imputations$imp23$escolaridad_rec,
                  amelia_fit$imputations$imp24$escolaridad_rec,
                  amelia_fit$imputations$imp25$escolaridad_rec,
                  amelia_fit$imputations$imp26$escolaridad_rec,
                  amelia_fit$imputations$imp27$escolaridad_rec,
                  amelia_fit$imputations$imp28$escolaridad_rec,
                  amelia_fit$imputations$imp29$escolaridad_rec,
                  amelia_fit$imputations$imp30$escolaridad_rec) 

escolaridad_rec_imputed2<-
escolaridad_rec_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("3-Completed primary school or less",as.character(.))~1,TRUE~0), .names="3_primary_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("2-Completed high school or less",as.character(.))~1,TRUE~0), .names="2_high_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.escolaridad_rec:amelia_fit.imputations.imp30.escolaridad_rec),~dplyr::case_when(grepl("1-More than high school",as.character(.))~1,TRUE~0), .names="1_more_high_{col}")) %>% 

  dplyr::mutate(escolaridad_rec_3_primary = base::rowSums(dplyr::select(., contains("3_primary_")))) %>% 
  dplyr::mutate(escolaridad_rec_2_high = base::rowSums(dplyr::select(., contains("2_high_"))))%>%
  dplyr::mutate(escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., contains("1_more_high_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#create an ordered index of the number of treatments by user
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

#:#:#:#;#;#;
CONS_C1_df_dup_SEP_2020_match_rn<-
    CONS_C1_df_dup_SEP_2020_match_miss %>%  #base de datos original, sin imputaciones
    dplyr::group_by(hash_key) %>% 
    dplyr::mutate(rn=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::select(rn)
#:#:#:#;#;#;
escolaridad_rec_imputed3<-
escolaridad_rec_imputed2 %>%   
  dplyr::left_join(cbind.data.frame(CONS_C1_df_dup_SEP_2020_match_miss$row, CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec,CONS_C1_df_dup_SEP_2020_match_rn$rn),by=c("amelia_fit.imputations.imp1.row"="CONS_C1_df_dup_SEP_2020_match_miss$row")) %>%
  dplyr::rename("escolaridad_rec_original"="CONS_C1_df_dup_SEP_2020_match_miss$escolaridad_rec") %>%
  dplyr::mutate(escolaridad_rec_original=as.numeric(substr(escolaridad_rec_original, 1, 1))) %>%
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #ordenar por tratamientos por usuario
  #:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::arrange(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`) %>% 
  dplyr::group_by(amelia_fit.imputations.imp1.hash_key) %>%  
  dplyr::mutate(siguiente_escolaridad_rec_original=lead(escolaridad_rec_original), 
                subsig_escolaridad_rec_original=lead(escolaridad_rec_original,n =2), 
                rn=max(`CONS_C1_df_dup_SEP_2020_match_rn$rn`),
                n_na_esc_or=is.na(escolaridad_rec_original),
                sum_n_na_esc_or=sum(n_na_esc_or,na.rm=T),
                max_sum_n_na_esc_or=max(n_na_esc_or,na.rm=T)
                ) %>% 
#dplyr::select(amelia_fit.imputations.imp1.hash_key,amelia_fit.imputations.imp30.rn,
#              siguiente_escolaridad_rec_original,escolaridad_rec_original,amelia_fit.imputations.imp1.fech_ing_num)%>% View()
  dplyr::ungroup()

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#PREPARACIƓN  BASE DE DATOS PARA IMPUTACION Y CREACIƓN DE VARIABLES PARA CONDICIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed4 <-
escolaridad_rec_imputed3 %>% 
  dplyr::select(amelia_fit.imputations.imp1.hash_key,`CONS_C1_df_dup_SEP_2020_match_rn$rn`,escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high, escolaridad_rec_1_more_high) %>%
  dplyr::rename("hash_key"="amelia_fit.imputations.imp1.hash_key") %>% 
  dplyr::rename("treat_no_for_usr"="CONS_C1_df_dup_SEP_2020_match_rn$rn") %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(treat_per_usr=max(treat_no_for_usr,na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  tidyr::pivot_wider(names_from=treat_no_for_usr,
                     #names_glue = "ord_treat_esc_{.value}",
                     values_from=c(escolaridad_rec_original,escolaridad_rec_3_primary,escolaridad_rec_2_high,escolaridad_rec_1_more_high),values_fill = NA) %>% 
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#Ver si existen inconsistencias en la escolaridad, pero no sólo inconsistencias inmediatas, sino con hasta 2 espacios entre tratamientos
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_tot_cond=dplyr::case_when(
    (escolaridad_rec_original_10>escolaridad_rec_original_9)|(escolaridad_rec_original_10>escolaridad_rec_original_8)|(escolaridad_rec_original_10>escolaridad_rec_original_7)|
      (escolaridad_rec_original_9>escolaridad_rec_original_8)|(escolaridad_rec_original_9>escolaridad_rec_original_7)|(escolaridad_rec_original_9>escolaridad_rec_original_6)|
      (escolaridad_rec_original_8>escolaridad_rec_original_7)|(escolaridad_rec_original_8>escolaridad_rec_original_6)|(escolaridad_rec_original_8>escolaridad_rec_original_5)|
      (escolaridad_rec_original_7>escolaridad_rec_original_6)|(escolaridad_rec_original_7>escolaridad_rec_original_5)|(escolaridad_rec_original_7>escolaridad_rec_original_4)|
      (escolaridad_rec_original_6>escolaridad_rec_original_5)|(escolaridad_rec_original_6>escolaridad_rec_original_4)|(escolaridad_rec_original_6>escolaridad_rec_original_3)|
      (escolaridad_rec_original_5>escolaridad_rec_original_4)|(escolaridad_rec_original_5>escolaridad_rec_original_3)|(escolaridad_rec_original_5>escolaridad_rec_original_2)|
      (escolaridad_rec_original_4>escolaridad_rec_original_3)|(escolaridad_rec_original_4>escolaridad_rec_original_2)|(escolaridad_rec_original_4>escolaridad_rec_original_1)|
      (escolaridad_rec_original_3>escolaridad_rec_original_2)|(escolaridad_rec_original_3>escolaridad_rec_original_1)|
      (escolaridad_rec_original_2>escolaridad_rec_original_1)~1,TRUE~0)) %>% 
  #dplyr::filter(escolaridad_rec_tot_cond==1) %>% #View() #0 rows Āæy 374745c85601976177fe614a7370e475?
  #dplyr::filter(treat_per_usr>1) %>% 
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  # Ver si hay valores de escolaridad ausentes en una progresión de tratamientos
  #:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio=dplyr::case_when(
      (sum_nas_esc>10 & treat_per_usr==10)|
      (sum_nas_esc>1 & treat_per_usr==9)|
      (sum_nas_esc>2 & treat_per_usr==8)|
      (sum_nas_esc>3 & treat_per_usr==7)|
      (sum_nas_esc>4 & treat_per_usr==6)|
      (sum_nas_esc>5 & treat_per_usr==5)|
      (sum_nas_esc>6 & treat_per_usr==4)|
      (sum_nas_esc>7 & treat_per_usr==3)|
      (sum_nas_esc>8 & treat_per_usr==2)|
      (sum_nas_esc>9 & treat_per_usr==1)~1,TRUE~0)) %>% #18b1f9646a2cd6bebd962637cff0a21a 5 casos
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  #Generar la escolaridad al final
  #:#:#:#:#:#:#:#:#
  dplyr::mutate(last_esc=dplyr::case_when(treat_per_usr==10~escolaridad_rec_original_10,
                                          treat_per_usr==9~escolaridad_rec_original_9,
                                          treat_per_usr==8~escolaridad_rec_original_8,
                                          treat_per_usr==7~escolaridad_rec_original_7,
                                          treat_per_usr==6~escolaridad_rec_original_6,
                                          treat_per_usr==5~escolaridad_rec_original_5,
                                          treat_per_usr==4~escolaridad_rec_original_4,
                                          treat_per_usr==3~escolaridad_rec_original_3,
                                          treat_per_usr==2~escolaridad_rec_original_2,
                                          treat_per_usr==1~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a0))si valor final vs. inicial son iguales, imputar todo lo que estĆ” en medio con el mismo valor
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_9=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>9 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_9)) %>% 
  dplyr::mutate(escolaridad_rec_original_8=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>8 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_8)) %>% 
  dplyr::mutate(escolaridad_rec_original_7=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>7 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_7)) %>% 
  dplyr::mutate(escolaridad_rec_original_6=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>6 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_6)) %>% 
  dplyr::mutate(escolaridad_rec_original_5=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>5 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_5)) %>% 
  dplyr::mutate(escolaridad_rec_original_4=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>4 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_4)) %>% 
  dplyr::mutate(escolaridad_rec_original_3=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>3 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_3)) %>% 
  dplyr::mutate(escolaridad_rec_original_2=
          dplyr::case_when(escolaridad_rec_original_1==last_esc & treat_per_usr>2 & !is.na(escolaridad_rec_original_1)~escolaridad_rec_original_1,
                           TRUE~escolaridad_rec_original_2)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#a1))cambiar valores vacĆ­os intermedios  /// fijarse en  & escolaridad_rec_tot_cond==1
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#reemplazar el nĆŗmero intermedio por cada tratamiento para cada usuario
  dplyr::mutate(escolaridad_rec_original_9=dplyr::case_when(escolaridad_rec_original_8==escolaridad_rec_original_10 & is.na(escolaridad_rec_original_9)&!is.na(escolaridad_rec_original_10)~escolaridad_rec_original_10,TRUE~escolaridad_rec_original_9)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_10)) %>% View()
  dplyr::mutate(escolaridad_rec_original_8=dplyr::case_when(escolaridad_rec_original_7==escolaridad_rec_original_9 & is.na(escolaridad_rec_original_8)&!is.na(escolaridad_rec_original_9)~escolaridad_rec_original_9,TRUE~escolaridad_rec_original_8)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_9)) %>% View()
  dplyr::mutate(escolaridad_rec_original_7=dplyr::case_when(escolaridad_rec_original_6==escolaridad_rec_original_8 & is.na(escolaridad_rec_original_7)&!is.na(escolaridad_rec_original_8)~escolaridad_rec_original_8 ,TRUE~escolaridad_rec_original_7)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_8)) %>% View()
  dplyr::mutate(escolaridad_rec_original_6=dplyr::case_when(escolaridad_rec_original_5==escolaridad_rec_original_7& is.na(escolaridad_rec_original_6)&!is.na(escolaridad_rec_original_7)~escolaridad_rec_original_7,TRUE~escolaridad_rec_original_6)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_7)) %>% View()
  dplyr::mutate(escolaridad_rec_original_5=dplyr::case_when(escolaridad_rec_original_4==escolaridad_rec_original_6  & is.na(escolaridad_rec_original_5)&!is.na(escolaridad_rec_original_6)~escolaridad_rec_original_6,TRUE~escolaridad_rec_original_5)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_6)) %>% View()
  dplyr::mutate(escolaridad_rec_original_4=dplyr::case_when(escolaridad_rec_original_3==escolaridad_rec_original_5  & is.na(escolaridad_rec_original_4)&!is.na(escolaridad_rec_original_5)~escolaridad_rec_original_5,TRUE~escolaridad_rec_original_4)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_5)) %>% View()
  dplyr::mutate(escolaridad_rec_original_3=dplyr::case_when(escolaridad_rec_original_2==escolaridad_rec_original_4  & is.na(escolaridad_rec_original_3)&!is.na(escolaridad_rec_original_4)~escolaridad_rec_original_4,TRUE~escolaridad_rec_original_3)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_4)) %>% View()
  dplyr::mutate(escolaridad_rec_original_2=dplyr::case_when(escolaridad_rec_original_1==escolaridad_rec_original_3  & is.na(escolaridad_rec_original_2)&!is.na(escolaridad_rec_original_3)~escolaridad_rec_original_3,TRUE~escolaridad_rec_original_2)) %>% 
  # dplyr::filter(!is.na(escolaridad_rec_original_3)) %>% View()
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a2))si tiene información en la segunda pero no en la primera, y no es un valor intermedio como secundaria completa (ya que en ese caso puede adoptar mÔs de un valor: mÔs o igual a ese valor), imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==3~3,
                                                            escolaridad_rec_original_2==1~1,
                                                            TRUE~escolaridad_rec_original_1)) %>% 
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
##a3))si hay mÔs de 2 tratamientos por usuarios, y tiene información en la segunda pero no en la primera, y es un valor intermedio pero tiene un tercer tratamiento con el mismo valor, imputarlo
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
    dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(escolaridad_rec_original_2==2 & escolaridad_rec_original_3==2~3,TRUE~escolaridad_rec_original_1))  %>% 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#medidas para capturar inconsistencias a lo largo de todos los tratamientos de cada usuario
#escolaridad_rec_imputed4 %>% #escolaridad_rec_tot_cond
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==1~1,TRUE~0), .names="1_more_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==2~1,TRUE~0), .names="2_high_{col}")) %>% 
  dplyr::mutate(across(c(escolaridad_rec_original_1:escolaridad_rec_original_10),~dplyr::case_when(.==3~1,TRUE~0), .names="3_primary_{col}")) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_1_more_high = base::rowSums(dplyr::select(., starts_with("1_more_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_2_high = base::rowSums(dplyr::select(., starts_with("2_high_")))) %>% 
  dplyr::mutate(suma_vals_escolaridad_rec_3_primary = base::rowSums(dplyr::select(., starts_with("3_primary_"))))

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#IMPUTACIONES
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
escolaridad_rec_imputed5<-
escolaridad_rec_imputed4 %>% 
  #hacer una suma de mÔs NA's de los que debería tener según la cantidad de tratamientos que tiene la persona
  #:#:#:#:#:#:#:#:#:
  dplyr::mutate(sum_nas_esc_post=base::rowSums(is.na(dplyr::select(., starts_with("escolaridad_rec_original_")))))%>%
  dplyr::mutate(escolaridad_rec_tot_nas_en_medio_post=dplyr::case_when(
      (sum_nas_esc_post>10 & treat_per_usr==10)|
      (sum_nas_esc_post>1 & treat_per_usr==9)|
      (sum_nas_esc_post>2 & treat_per_usr==8)|
      (sum_nas_esc_post>3 & treat_per_usr==7)|
      (sum_nas_esc_post>4 & treat_per_usr==6)|
      (sum_nas_esc_post>5 & treat_per_usr==5)|
      (sum_nas_esc_post>6 & treat_per_usr==4)|
      (sum_nas_esc_post>7 & treat_per_usr==3)|
      (sum_nas_esc_post>8 & treat_per_usr==2)|
      (sum_nas_esc_post>9 & treat_per_usr==1)~1,TRUE~0)) %>%
  #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #d864967fa0b1c5bb1d4eb5f6a7c8c2c1
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b0))valor inicial y sólo un tratamiento, se imputa por el valor imputado mÔs frecuente de las 30 bases de datos
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)~3,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_2_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_2_high_1>escolaridad_rec_1_more_high_1)~2,
    is.na(escolaridad_rec_original_1) & treat_per_usr==1 & 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_3_primary_1)& 
      (escolaridad_rec_1_more_high_1>escolaridad_rec_2_high_1)~1,
    TRUE~escolaridad_rec_original_1)) %>% 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b1))valor en el segundo tratamiento es intermedio, inicial se imputa, dependiendo si primaria es mayor que intermedio o no
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  ###
  #dplyr::filter(is.na(escolaridad_rec_original_1),!is.na(escolaridad_rec_original_2)) %>%
  #dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1,escolaridad_rec_1_more_high_1) %>% View()
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#

  dplyr::mutate(escolaridad_rec_original_1=dplyr::case_when(
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1>escolaridad_rec_2_high_1)~3,
    is.na(escolaridad_rec_original_1) & escolaridad_rec_original_2==2 & (escolaridad_rec_3_primary_1<escolaridad_rec_2_high_1)~2,TRUE~escolaridad_rec_original_1))%>%
    #dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
#610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacĆ­o, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
  dplyr::mutate(escolaridad_rec_original_10= dplyr::case_when(
  #
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==2 & 
      (escolaridad_rec_1_more_high_10<escolaridad_rec_2_high_10)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_1_more_high_10>escolaridad_rec_2_high_10) & (escolaridad_rec_1_more_high_10>escolaridad_rec_3_primary_10)~1,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
        (escolaridad_rec_2_high_10 >escolaridad_rec_1_more_high_10) & (escolaridad_rec_2_high_10>escolaridad_rec_3_primary_10)~2,
    treat_per_usr==10 & is.na(escolaridad_rec_original_10) &  escolaridad_rec_original_9==3 & 
      (escolaridad_rec_3_primary_10 >escolaridad_rec_2_high_10) & (escolaridad_rec_3_primary_10>escolaridad_rec_1_more_high_10)~2,TRUE~escolaridad_rec_original_10)) %>% 
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
  #
    dplyr::mutate(escolaridad_rec_original_9= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==2 & 
      (escolaridad_rec_1_more_high_9<escolaridad_rec_2_high_9)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_1_more_high_9>escolaridad_rec_2_high_9) & (escolaridad_rec_1_more_high_9>escolaridad_rec_3_primary_9)~1,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
        (escolaridad_rec_2_high_9 >escolaridad_rec_1_more_high_9) & (escolaridad_rec_2_high_9>escolaridad_rec_3_primary_9)~2,
    treat_per_usr==9 & is.na(escolaridad_rec_original_9) &  escolaridad_rec_original_8==3 & 
      (escolaridad_rec_3_primary_9 >escolaridad_rec_2_high_9) & (escolaridad_rec_3_primary_9>escolaridad_rec_1_more_high_9)~2,TRUE~escolaridad_rec_original_9)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_8= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==2 & 
      (escolaridad_rec_1_more_high_8<escolaridad_rec_2_high_8)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_1_more_high_8>escolaridad_rec_2_high_8) & (escolaridad_rec_1_more_high_8>escolaridad_rec_3_primary_8)~1,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
        (escolaridad_rec_2_high_8 >escolaridad_rec_1_more_high_8) & (escolaridad_rec_2_high_8>escolaridad_rec_3_primary_8)~2,
    treat_per_usr==8 & is.na(escolaridad_rec_original_8) &  escolaridad_rec_original_7==3 & 
      (escolaridad_rec_3_primary_8 >escolaridad_rec_2_high_8) & (escolaridad_rec_3_primary_8>escolaridad_rec_1_more_high_8)~2,TRUE~escolaridad_rec_original_8)) %>% 
  #
        dplyr::mutate(escolaridad_rec_original_7= dplyr::case_when(
          #si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==2 & 
      (escolaridad_rec_1_more_high_7<escolaridad_rec_2_high_7)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_1_more_high_7>escolaridad_rec_2_high_7) & (escolaridad_rec_1_more_high_7>escolaridad_rec_3_primary_7)~1,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
        (escolaridad_rec_2_high_7 >escolaridad_rec_1_more_high_7) & (escolaridad_rec_2_high_7>escolaridad_rec_3_primary_7)~2,
    treat_per_usr==7 & is.na(escolaridad_rec_original_7) &  escolaridad_rec_original_6==3 & 
      (escolaridad_rec_3_primary_7 >escolaridad_rec_2_high_7) & (escolaridad_rec_3_primary_7>escolaridad_rec_1_more_high_7)~2,TRUE~escolaridad_rec_original_7)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_6= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==2 & 
      (escolaridad_rec_1_more_high_6<escolaridad_rec_2_high_6)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_1_more_high_6>escolaridad_rec_2_high_6) & (escolaridad_rec_1_more_high_6>escolaridad_rec_3_primary_6)~1,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
        (escolaridad_rec_2_high_6 >escolaridad_rec_1_more_high_6) & (escolaridad_rec_2_high_6>escolaridad_rec_3_primary_6)~2,
    treat_per_usr==6 & is.na(escolaridad_rec_original_6) &  escolaridad_rec_original_5==3 & 
      (escolaridad_rec_3_primary_6 >escolaridad_rec_2_high_6) & (escolaridad_rec_3_primary_6>escolaridad_rec_1_more_high_6)~2,TRUE~escolaridad_rec_original_6)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_5= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==2 & 
      (escolaridad_rec_1_more_high_5<escolaridad_rec_2_high_5)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_1_more_high_5>escolaridad_rec_2_high_5) & (escolaridad_rec_1_more_high_5>escolaridad_rec_3_primary_5)~1,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
        (escolaridad_rec_2_high_5 >escolaridad_rec_1_more_high_5) & (escolaridad_rec_2_high_5>escolaridad_rec_3_primary_5)~2,
    treat_per_usr==5 & is.na(escolaridad_rec_original_5) &  escolaridad_rec_original_4==3 & 
      (escolaridad_rec_3_primary_5 >escolaridad_rec_2_high_5) & (escolaridad_rec_3_primary_5>escolaridad_rec_1_more_high_5)~2,TRUE~escolaridad_rec_original_5)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_4= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_4<escolaridad_rec_2_high_4)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_1_more_high_4>escolaridad_rec_2_high_4) & (escolaridad_rec_1_more_high_4>escolaridad_rec_3_primary_4)~1,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
        (escolaridad_rec_2_high_4 >escolaridad_rec_1_more_high_4) & (escolaridad_rec_2_high_4>escolaridad_rec_3_primary_4)~2,
    treat_per_usr==4 & is.na(escolaridad_rec_original_4) &  escolaridad_rec_original_3==3 & 
      (escolaridad_rec_3_primary_4 >escolaridad_rec_2_high_4) & (escolaridad_rec_3_primary_4>escolaridad_rec_1_more_high_4)~2,TRUE~escolaridad_rec_original_4)) %>% 
  #
          dplyr::mutate(escolaridad_rec_original_3= dplyr::case_when(
#si la educación en el tratamiento anterior es la mÔxima, imputar con el mismo valor
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==1~1,
#si la educación en el tratamiento anterior es intermedio, ver cuÔl es el valor mÔs creible (conserva intermedio o logra universitario)    
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_3==2 & 
      (escolaridad_rec_1_more_high_3<escolaridad_rec_2_high_3)~2,
#si la educación en el tratamiento anterior es la mÔs baja, ver cuÔl es el valor mÔs creible (mantiene educación, logra intermedio o logra universitario)      
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_1_more_high_3>escolaridad_rec_2_high_3) & (escolaridad_rec_1_more_high_3>escolaridad_rec_3_primary_3)~1,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
        (escolaridad_rec_2_high_3 >escolaridad_rec_1_more_high_3) & (escolaridad_rec_2_high_3>escolaridad_rec_3_primary_3)~2,
    treat_per_usr==3 & is.na(escolaridad_rec_original_3) &  escolaridad_rec_original_2==3 & 
      (escolaridad_rec_3_primary_3 >escolaridad_rec_2_high_3) & (escolaridad_rec_3_primary_3>escolaridad_rec_1_more_high_3)~2,TRUE~escolaridad_rec_original_3))
#:#:#:#:
 # dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)
 #:#:#:#:
  #comprobar si en verdad calza:
  #%>%dplyr::filter(hash_key=="ef4325cda7ddd92f6218bb910c3e0895") %>% dplyr::select(escolaridad_rec_original_1,escolaridad_rec_original_2,treat_per_usr,escolaridad_rec_3_primary_1,escolaridad_rec_2_high_1)
  #610dd4dba4dbb62848691b6916828948
  #90d581cd11064c41b82f8e4d6ff7b70b
#escolaridad_rec_imputed5 %>% 
#    dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_3,escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_4)

#98d6644d995ea2c8777a683160728004
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#b2))Valor final es vacĆ­o, hay un valor anterior
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_ 
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_  
escolaridad_rec_imputed6<-
escolaridad_rec_imputed5 %>% 
#dplyr::filter(escolaridad_rec_tot_nas_en_medio_post>0,treat_per_usr>1)%>%dplyr::filter(hash_key=="98d6644d995ea2c8777a683160728004") %>% dplyr::select(escolaridad_rec_original_4,escolaridad_rec_original_4,treat_per_usr,escolaridad_rec_3_primary_4,escolaridad_rec_2_high_4,escolaridad_rec_1_more_high_3)
  dplyr::select(hash_key,starts_with("escolaridad_rec_original_")) %>%
  tidyr::pivot_longer(cols = starts_with("escolaridad_rec_original_"),
   names_to = "rn",
   names_prefix = "escolaridad_rec_original_") %>% 
  dplyr::filter(!is.na(value)) %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::select(hash_rn,value)
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
CONS_C1_df_dup_SEP_2020_match_miss4<-
CONS_C1_df_dup_SEP_2020_match_miss3 %>%
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(rn=row_number()) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(hash_rn=paste0(hash_key,"_",rn)) %>% 
  dplyr::left_join(escolaridad_rec_imputed6, by=c("hash_rn")) %>% 
  dplyr::mutate(escolaridad_rec=dplyr::case_when(value==1~"1-More than high school",value==2~"2-Completed high school or less",value==3~"3-Completed primary school or less")) %>% 
  #
  dplyr::arrange(hash_key,rn) %>% 
  #dplyr::mutate(escolaridad_rec=dplyr::case_when(is.na(escolaridad_rec)~value,TRUE~as.character(escolaridad_rec))) %>% 
  dplyr::mutate(escolaridad_rec=parse_factor(as.character(escolaridad_rec),levels=c('3-Completed primary school or less', '2-Completed high school or less', '1-More than high school'), ordered =F,trim_ws=T,include_na =F, locale=locale(encoding = "Latin1"))) %>%
  dplyr::select(-value,-hash_rn) %>% 
  data.table()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
paste("Check inconsistencies with posterior educational attainments (0= No inconsistencies):",CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
  dplyr::arrange(hash_key,rn) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(escolaridad_rec_num=as.numeric(substr(escolaridad_rec, 1, 1)),
                sig_escolaridad_rec_num=lead(escolaridad_rec_num),
                ant_escolaridad_rec_num=lag(escolaridad_rec_num)) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(escolaridad_rec_num>ant_escolaridad_rec_num) %>% 
  dplyr::select(hash_key,rn,fech_ing_num, escolaridad_rec, escolaridad_rec_num, sig_escolaridad_rec_num,ant_escolaridad_rec_num) %>% 
  nrow())
## [1] "Check inconsistencies with posterior educational attainments (0= No inconsistencies): 0"


We ended having 241 missing values in educational attainment (users=238), because the imputed values did not fulfilled the requirements of a progression of the educational attainment (eg., a user could not respond to have completed secondary school, but then answer that he had completed primary school only), for example, due to ties in the imputed values or no imputed values.


Marital status

Additionally, we replaced missing values of the marital status (n=198). Since different marital status were not particularly more vulnerable between each other, we selected the most frequent imputed value among the different imputed databases.


# Ver distintos valores propuestos para estado conyugal
estado_conyugal_2_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$estado_conyugal_2,
       amelia_fit$imputations$imp2$estado_conyugal_2,
       amelia_fit$imputations$imp3$estado_conyugal_2,
       amelia_fit$imputations$imp4$estado_conyugal_2,
       amelia_fit$imputations$imp5$estado_conyugal_2,
       amelia_fit$imputations$imp6$estado_conyugal_2,
       amelia_fit$imputations$imp7$estado_conyugal_2,
       amelia_fit$imputations$imp8$estado_conyugal_2,
       amelia_fit$imputations$imp9$estado_conyugal_2,
       amelia_fit$imputations$imp10$estado_conyugal_2,
       amelia_fit$imputations$imp11$estado_conyugal_2,
       amelia_fit$imputations$imp12$estado_conyugal_2,
       amelia_fit$imputations$imp13$estado_conyugal_2,
       amelia_fit$imputations$imp14$estado_conyugal_2,
       amelia_fit$imputations$imp15$estado_conyugal_2,
       amelia_fit$imputations$imp16$estado_conyugal_2,
       amelia_fit$imputations$imp17$estado_conyugal_2,
       amelia_fit$imputations$imp18$estado_conyugal_2,
       amelia_fit$imputations$imp19$estado_conyugal_2,
       amelia_fit$imputations$imp20$estado_conyugal_2,
       amelia_fit$imputations$imp21$estado_conyugal_2,
       amelia_fit$imputations$imp22$estado_conyugal_2,
       amelia_fit$imputations$imp23$estado_conyugal_2,
       amelia_fit$imputations$imp24$estado_conyugal_2,
       amelia_fit$imputations$imp25$estado_conyugal_2,
       amelia_fit$imputations$imp26$estado_conyugal_2,
       amelia_fit$imputations$imp27$estado_conyugal_2,
       amelia_fit$imputations$imp28$estado_conyugal_2,
       amelia_fit$imputations$imp29$estado_conyugal_2,
       amelia_fit$imputations$imp30$estado_conyugal_2
       ) 

estado_conyugal_2_imputed<-
estado_conyugal_2_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Married/Shared living arrangements",as.character(.))~1,TRUE~0), .names="married_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Separated/Divorced",as.character(.))~1,TRUE~0), .names="sep_div_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Single",as.character(.))~1,TRUE~0), .names="singl_{col}"))%>%
  dplyr::mutate(across(c(amelia_fit.imputations.imp1.estado_conyugal_2:amelia_fit.imputations.imp30.estado_conyugal_2),~dplyr::case_when(grepl("Widower",as.character(.))~1,TRUE~0), .names="widow_{col}"))%>%
 
  dplyr::mutate(estado_conyugal_2_married = base::rowSums(dplyr::select(., starts_with("married_"))))%>%
  dplyr::mutate(estado_conyugal_2_sep_div = base::rowSums(dplyr::select(., starts_with("sep_div_"))))%>%
  dplyr::mutate(estado_conyugal_2_singl = base::rowSums(dplyr::select(., starts_with("singl_"))))%>%
  dplyr::mutate(estado_conyugal_2_wid = base::rowSums(dplyr::select(., starts_with("widow_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_married>0~1,TRUE~0)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_sep_div>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_singl>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  dplyr::mutate(estado_conyugal_2_tot=dplyr::case_when(estado_conyugal_2_wid>0~estado_conyugal_2_tot+1,TRUE~estado_conyugal_2_tot)) %>% 
  janitor::clean_names()
  
estado_conyugal_2_imputed_cat_est_cony<-  
    estado_conyugal_2_imputed %>%
        tidyr::pivot_longer(c(estado_conyugal_2_married, estado_conyugal_2_sep_div, estado_conyugal_2_singl, estado_conyugal_2_wid), names_to = "cat_est_conyugal", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(estado_conyugal_2_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(estado_conyugal_2_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_est_conyugal,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_est_conyugal=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_est_conyugal)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
estado_conyugal_2_imputed<-
  estado_conyugal_2_imputed %>% 
    dplyr::left_join(estado_conyugal_2_imputed_cat_est_cony, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_est_conyugal=dplyr::case_when(cat_est_conyugal=="estado_conyugal_2_married"~"Married/Shared living arrangements",cat_est_conyugal=="estado_conyugal_2_sep_div"~"Separated/Divorced",cat_est_conyugal=="estado_conyugal_2_singl"~"Single",cat_est_conyugal=="estado_conyugal_2_wid"~"Widower"
    ))%>% 
  janitor::clean_names()

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss5<-
CONS_C1_df_dup_SEP_2020_match_miss4 %>% 
   dplyr::left_join(dplyr::select(estado_conyugal_2_imputed,amelia_fit_imputations_imp1_row,cat_est_conyugal), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(estado_conyugal_2=factor(dplyr::case_when(is.na(estado_conyugal_2)~as.character(cat_est_conyugal),TRUE~as.character(estado_conyugal_2)))) %>% 
  data.table()

no_calzaron_estado_cony<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist()

#CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
#dplyr::filter(hash_key %in% CONS_C1_df_dup_SEP_2020_match_miss5 %>% dplyr::filter(is.na(estado_conyugal_2)) %>% dplyr::distinct(hash_key) %>% unlist())


We could not resolve Marital status in 14 cases due to ties in the most frequent values.


Region & Type of Center (Public)

We looked over possible imputations to region of the center (n=28) and type of the center (public or private) (n=28).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

#no hay información. debemos imputar
no_mostrar=0
if (no_mostrar==1){
tipo_centro_nombre_region_nas_nombre_region<-
CONS_C1_df_dup_SEP_2020 %>% 
    #dplyr::filter(row %in% unlist(unique(CONS_C1_df_dup_SEP_2020_match[,"row"]))) %>% 
    dplyr::filter(is.na(nombre_region)) %>% 
    janitor::tabyl(tipo_centro, nombre_region) 
}

nombre_region_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$nombre_region,
       amelia_fit$imputations$imp2$nombre_region,
       amelia_fit$imputations$imp3$nombre_region,
       amelia_fit$imputations$imp4$nombre_region,
       amelia_fit$imputations$imp5$nombre_region,
       amelia_fit$imputations$imp6$nombre_region,
       amelia_fit$imputations$imp7$nombre_region,
       amelia_fit$imputations$imp8$nombre_region,
       amelia_fit$imputations$imp9$nombre_region,
       amelia_fit$imputations$imp10$nombre_region,
       amelia_fit$imputations$imp11$nombre_region,
       amelia_fit$imputations$imp12$nombre_region,
       amelia_fit$imputations$imp13$nombre_region,
       amelia_fit$imputations$imp14$nombre_region,
       amelia_fit$imputations$imp15$nombre_region,
       amelia_fit$imputations$imp16$nombre_region,
       amelia_fit$imputations$imp17$nombre_region,
       amelia_fit$imputations$imp18$nombre_region,
       amelia_fit$imputations$imp19$nombre_region,
       amelia_fit$imputations$imp20$nombre_region,
       amelia_fit$imputations$imp21$nombre_region,
       amelia_fit$imputations$imp22$nombre_region,
       amelia_fit$imputations$imp23$nombre_region,
       amelia_fit$imputations$imp24$nombre_region,
       amelia_fit$imputations$imp25$nombre_region,
       amelia_fit$imputations$imp26$nombre_region,
       amelia_fit$imputations$imp27$nombre_region,
       amelia_fit$imputations$imp28$nombre_region,
       amelia_fit$imputations$imp29$nombre_region,
       amelia_fit$imputations$imp30$nombre_region
       ) 
nombre_region_imputed<-
nombre_region_imputed %>% 
  data.frame() %>% 
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Antofagasta",as.character(.))~1,TRUE~0), .names="reg_02_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Araucan",as.character(.))~1,TRUE~0), .names="reg_09_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Arica",as.character(.))~1,TRUE~0), .names="reg_15_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Atacama",as.character(.))~1,TRUE~0), .names="reg_03_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Ays",as.character(.))~1,TRUE~0), .names="reg_11_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Biob",as.character(.))~1,TRUE~0), .names="reg_08_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Coquimbo",as.character(.))~1,TRUE~0), .names="reg_04_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los Lagos",as.character(.))~1,TRUE~0), .names="reg_10_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Los R",as.character(.))~1,TRUE~0), .names="reg_14_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Magallanes",as.character(.))~1,TRUE~0), .names="reg_12_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Maule",as.character(.))~1,TRUE~0), .names="reg_07_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Metropolitana",as.character(.))~1,TRUE~0), .names="reg_13_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("uble",as.character(.))~1,TRUE~0), .names="reg_16_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Higgins",as.character(.))~1,TRUE~0), .names="reg_06_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Tarapac",as.character(.))~1,TRUE~0), .names="reg_01_{col}"))%>%
dplyr::mutate(across(c(amelia_fit.imputations.imp1.nombre_region:amelia_fit.imputations.imp30.nombre_region),~dplyr::case_when(grepl("Valpara",as.character(.))~1,TRUE~0), .names="reg_05_{col}"))%>%
  
 
  dplyr::mutate(nombre_region_02 = base::rowSums(dplyr::select(., starts_with("reg_02_"))))%>%
  dplyr::mutate(nombre_region_09 = base::rowSums(dplyr::select(., starts_with("reg_09_"))))%>%
  dplyr::mutate(nombre_region_15 = base::rowSums(dplyr::select(., starts_with("reg_15_"))))%>%
  dplyr::mutate(nombre_region_03 = base::rowSums(dplyr::select(., starts_with("reg_03_"))))%>%
  dplyr::mutate(nombre_region_11 = base::rowSums(dplyr::select(., starts_with("reg_11_"))))%>%
  dplyr::mutate(nombre_region_08 = base::rowSums(dplyr::select(., starts_with("reg_08_"))))%>%
  dplyr::mutate(nombre_region_04 = base::rowSums(dplyr::select(., starts_with("reg_04_"))))%>%
  dplyr::mutate(nombre_region_10 = base::rowSums(dplyr::select(., starts_with("reg_10_"))))%>%
  dplyr::mutate(nombre_region_14 = base::rowSums(dplyr::select(., starts_with("reg_14_"))))%>%
  dplyr::mutate(nombre_region_12 = base::rowSums(dplyr::select(., starts_with("reg_12_"))))%>%
  dplyr::mutate(nombre_region_07 = base::rowSums(dplyr::select(., starts_with("reg_07_"))))%>%
  dplyr::mutate(nombre_region_13 = base::rowSums(dplyr::select(., starts_with("reg_13_"))))%>%
  dplyr::mutate(nombre_region_16 = base::rowSums(dplyr::select(., starts_with("reg_16_"))))%>%
  dplyr::mutate(nombre_region_06 = base::rowSums(dplyr::select(., starts_with("reg_06_"))))%>%
  dplyr::mutate(nombre_region_01 = base::rowSums(dplyr::select(., starts_with("reg_01_"))))%>%
  dplyr::mutate(nombre_region_05 = base::rowSums(dplyr::select(., starts_with("reg_05_"))))%>%
  #dplyr::summarise(min_mar=max(sus_ini_mod_mvv_mar[sus_ini_mod_mvv_mar<30]),min_oh=max(sus_ini_mod_mvv_oh[sus_ini_mod_mvv_oh<30]),min_pb=max(sus_ini_mod_mvv_pb[sus_ini_mod_mvv_pb<30]),min_coc=max(sus_ini_mod_mvv_coc[sus_ini_mod_mvv_coc<30]),min_otr=max(sus_ini_mod_mvv_otr[sus_ini_mod_mvv_otr<30]))
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_02>0~1,TRUE~0)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_09>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_15>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_03>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>%
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_11>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_08>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_04>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_10>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_14>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_12>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_07>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_13>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_16>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_06>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_01>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  dplyr::mutate(nombre_region_tot=dplyr::case_when(nombre_region_05>0~nombre_region_tot+1,TRUE~nombre_region_tot)) %>% 
  janitor::clean_names()
  
nombre_region_imputed_cat_reg<-  
    nombre_region_imputed %>%
        tidyr::pivot_longer(c(nombre_region_01, nombre_region_02, nombre_region_03, nombre_region_04, nombre_region_05, nombre_region_06, nombre_region_07, nombre_region_08, nombre_region_09, nombre_region_10, nombre_region_11, nombre_region_12, nombre_region_13, nombre_region_14, nombre_region_15), names_to = "cat_nombre_region", values_to = "count") %>%
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(nombre_region_imputed_max=max(count,na.rm=T)) %>% 
        dplyr::ungroup() %>% 
        dplyr::filter(nombre_region_imputed_max==count) %>% 
        dplyr::select(amelia_fit_imputations_imp1_row,cat_nombre_region,count) %>% 
        dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
        dplyr::mutate(n_row=n()) %>% 
        dplyr::ungroup() %>% 
        dplyr::mutate(cat_nombre_region=dplyr::case_when(n_row>1~NA_character_,
                                                        TRUE~cat_nombre_region)) %>% 
        dplyr::distinct(amelia_fit_imputations_imp1_row,.keep_all = T)
  
nombre_region_imputed<-
  nombre_region_imputed %>% 
    dplyr::left_join(nombre_region_imputed_cat_reg, by="amelia_fit_imputations_imp1_row") %>%
    dplyr::mutate(cat_nombre_region=dplyr::case_when(cat_nombre_region=="nombre_region_01"~"TarapacƔ (01)",cat_nombre_region=="nombre_region_02"~"Antofagasta (02)",cat_nombre_region=="nombre_region_03"~"Atacama (03)",cat_nombre_region=="nombre_region_04"~"Coquimbo (04)",cat_nombre_region=="nombre_region_05"~"Valparaƭso (05)",cat_nombre_region=="nombre_region_06"~"O'Higgins (06)",cat_nombre_region=="nombre_region_07"~"Maule (07)",cat_nombre_region=="nombre_region_08"~"Biobƭo (08)",cat_nombre_region=="nombre_region_09"~"Araucanƭa (09)",cat_nombre_region=="nombre_region_10"~"Los Lagos (10)",cat_nombre_region=="nombre_region_11"~"AysƩn (11)",cat_nombre_region=="nombre_region_12"~"Magallanes (12)",cat_nombre_region=="nombre_region_13"~"Metropolitana (13)",
                                                 cat_nombre_region=="nombre_region_14"~"Los Rƭos (14)",cat_nombre_region=="nombre_region_15"~"Arica (15)",cat_nombre_region=="nombre_region_16"~"Ƒuble (16)",
    ))%>% 
  janitor::clean_names()

#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_
tipo_centro_pub_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_centro_pub,
       amelia_fit$imputations$imp2$tipo_centro_pub,
       amelia_fit$imputations$imp3$tipo_centro_pub,
       amelia_fit$imputations$imp4$tipo_centro_pub,
       amelia_fit$imputations$imp5$tipo_centro_pub,
       amelia_fit$imputations$imp6$tipo_centro_pub,
       amelia_fit$imputations$imp7$tipo_centro_pub,
       amelia_fit$imputations$imp8$tipo_centro_pub,
       amelia_fit$imputations$imp9$tipo_centro_pub,
       amelia_fit$imputations$imp10$tipo_centro_pub,
       amelia_fit$imputations$imp11$tipo_centro_pub,
       amelia_fit$imputations$imp12$tipo_centro_pub,
       amelia_fit$imputations$imp13$tipo_centro_pub,
       amelia_fit$imputations$imp14$tipo_centro_pub,
       amelia_fit$imputations$imp15$tipo_centro_pub,
       amelia_fit$imputations$imp16$tipo_centro_pub,
       amelia_fit$imputations$imp17$tipo_centro_pub,
       amelia_fit$imputations$imp18$tipo_centro_pub,
       amelia_fit$imputations$imp19$tipo_centro_pub,
       amelia_fit$imputations$imp20$tipo_centro_pub,
       amelia_fit$imputations$imp21$tipo_centro_pub,
       amelia_fit$imputations$imp22$tipo_centro_pub,
       amelia_fit$imputations$imp23$tipo_centro_pub,
       amelia_fit$imputations$imp24$tipo_centro_pub,
       amelia_fit$imputations$imp25$tipo_centro_pub,
       amelia_fit$imputations$imp26$tipo_centro_pub,
       amelia_fit$imputations$imp27$tipo_centro_pub,
       amelia_fit$imputations$imp28$tipo_centro_pub,
       amelia_fit$imputations$imp29$tipo_centro_pub,
       amelia_fit$imputations$imp30$tipo_centro_pub
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(tipo_centro_pub_to_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss6<-
CONS_C1_df_dup_SEP_2020_match_miss5 %>% 
   dplyr::left_join(dplyr::select(nombre_region_imputed,amelia_fit_imputations_imp1_row,cat_nombre_region), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
    dplyr::mutate(nombre_region=factor(dplyr::case_when(is.na(nombre_region)~as.character(cat_nombre_region),TRUE~as.character(nombre_region)))) %>% 
  dplyr::left_join(dplyr::select(tipo_centro_pub_imputed,amelia_fit_imputations_imp1_row,tipo_centro_pub_to_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_centro_pub=factor(dplyr::case_when(is.na(tipo_centro_pub)~as.logical(tipo_centro_pub_to_imputation),TRUE~as.logical(tipo_centro_pub)))) %>%
  dplyr::select(-c(cat_est_conyugal,cat_nombre_region,tipo_centro_pub_to_imputation,tipo_centro_pub_to_imputation)) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


There were impossible to impute region of the center in 6 cases due to ties in the different imputed values. In case of public or private center, there were no missing values once imputed.


Diagnose of Drug Consumption

We looked over possible imputations to the diagnosis of drug consumption (n=1).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

dg_trs_cons_sus_or_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp2$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp3$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp4$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp5$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp6$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp7$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp8$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp9$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp10$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp11$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp12$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp13$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp14$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp15$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp16$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp17$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp18$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp19$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp20$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp21$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp22$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp23$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp24$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp25$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp26$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp27$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp28$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp29$dg_trs_cons_sus_or,
       amelia_fit$imputations$imp30$dg_trs_cons_sus_or
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::filter(value==TRUE) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(dg_trs_cons_imputation=ifelse(n()>15,1,0))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss7<-
CONS_C1_df_dup_SEP_2020_match_miss6 %>% 
    dplyr::left_join(dplyr::select(dg_trs_cons_sus_or_imputed,amelia_fit_imputations_imp1_row,dg_trs_cons_imputation), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(dg_trs_cons_sus_or=factor(dplyr::case_when(is.na(dg_trs_cons_sus_or)~as.logical(dg_trs_cons_imputation),TRUE~as.logical(dg_trs_cons_sus_or)))) %>%
  dplyr::select(-dg_trs_cons_imputation) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))


Cause of Discharge

We looked over possible imputations to the truly missing values, discarding missing values due to censorship (n=20).

motivo_de_egreso_a_imputar<-
CONS_C1_df_dup_SEP_2020_match_miss %>% dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_imp)) %>% dplyr::filter(!is.na(fech_egres_imp))%>%dplyr::select(row)

motivodeegreso_mod_imp_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp2$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp3$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp4$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp5$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp6$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp7$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp8$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp9$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp10$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp11$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp12$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp13$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp14$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp15$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp16$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp17$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp18$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp19$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp20$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp21$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp22$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp23$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp24$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp25$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp26$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp27$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp28$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp29$motivodeegreso_mod_imp,
       amelia_fit$imputations$imp30$motivodeegreso_mod_imp
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::filter(amelia_fit_imputations_imp1_row %in% unlist(motivo_de_egreso_a_imputar$row)) %>% 
  #FILTRAR CASOS QUE SON ILƓGICOS: MUERTES CON TRATAMIENTOS POSTERIORES (1)
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,dup, duplicates_filtered,evaluacindelprocesoteraputico,fech_ing_next_treat),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(value_death=dplyr::case_when(value=="Death"& !is.na(fech_ing_next_treat)~1,TRUE~0)) %>% 
  dplyr::filter(value_death!=1) %>%  
  #:#:#:#:#:
  dplyr::count(amelia_fit_imputations_imp1_row,value) %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::slice_min(n, n = 1) %>% 
  dplyr::summarise(adm_dis=sum(value == "Administrative discharge",na.rm=T),
                    death=sum(value == "Death",na.rm=T),
                    referral=sum(value == "Referral to another treatment",na.rm=T),
                    ter_dis=sum(value == "Therapeutic discharge",na.rm=T),
                    dropout=sum(value =="Drop-out",na.rm=T)) %>% 
  rowwise() %>% 
  dplyr::mutate(ties=sum(c_across(adm_dis:dropout)),ties=ifelse(ties>1,1,0)) %>% 
  #dplyr::filter(ties==1) %>% 
  dplyr::ungroup() %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp, fech_egres_imp,fech_egres_num,dup, duplicates_filtered,evaluacindelprocesoteraputico,tipo_centro_derivacion),by=c("amelia_fit_imputations_imp1_row"="row")) %>% 
  dplyr::mutate(motivodeegreso_mod_imp_imputation= dplyr::case_when(
    ties==0 & adm_dis==1 & fech_egres_imp<"2019-11-13"~"Administrative discharge",
    #its an absorving state. should not have posterior treatments
    ties==0 & death==1 & fech_egres_imp<"2019-11-13" & dup==duplicates_filtered~"Death",
    ties==0 & referral==1 & fech_egres_imp<"2019-11-13"~"Referral to another treatment",
    ties==0 & ter_dis==1 & fech_egres_imp<"2019-11-13"~"Therapeutic discharge",
    ties==0 & dropout==1 & fech_egres_imp<"2019-11-13"~"Drop-out",
    #si no hay fecha de egreso, estÔ en la fecha de censura, sólo puede ser tratamiento en curso
    fech_egres_imp>="2019-11-13"~NA_character_,
    TRUE~NA_character_)) %>% 
    #si tiene evaluacindelprocesoteraputico, es porque no es un tratamiento en curso
  dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp")

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:
CONS_C1_df_dup_SEP_2020_match_miss8<-
CONS_C1_df_dup_SEP_2020_match_miss7 %>% 
   dplyr::left_join(motivodeegreso_mod_imp_imputed[,c("amelia_fit_imputations_imp1_row","motivodeegreso_mod_imp_original","fech_egres_imp","fech_egres_num","motivodeegreso_mod_imp_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
  #dplyr::filter(is.na(motivodeegreso_mod_imp)) %>% dplyr::select(row,hash_key,motivodeegreso_mod_imp_original, motivodeegreso_mod_imp_imputation,motivodeegreso_mod_imp,fech_egres_num,fech_egres_imp)
      dplyr::mutate(motivodeegreso_mod_imp=factor(dplyr::case_when(is.na(motivodeegreso_mod_imp)~motivodeegreso_mod_imp_imputation,
                                                                   motivodeegreso_mod_imp_original=="Ongoing treatment"~NA_character_, TRUE~as.character(motivodeegreso_mod_imp)))) %>% 
  dplyr::select(-motivodeegreso_mod_imp_imputation,-fech_egres_imp,-fech_egres_num,-motivodeegreso_mod_imp_original) %>% 
  #dplyr::rename_all( list(~paste0(., ".left"))) %>% 
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,motivodeegreso_mod_imp) %>% 
                     dplyr::rename("motivodeegreso_mod_imp_original"="motivodeegreso_mod_imp"),by="row") %>%
  data.table()

# CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp,motivodeegreso_mod_imp_original)
#CONS_C1_df_dup_SEP_2020_match_miss8 %>% janitor::tabyl(motivodeegreso_mod_imp_original)

#
if(
CONS_C1_df_dup_SEP_2020_match_miss8 %>% dplyr::filter(motivodeegreso_mod_imp_original!="Ongoing treatment",is.na(motivodeegreso_mod_imp)) %>% nrow()>0){"There are missing values on the cause of discharge"}


A total of 3 cases were not imputed due to ties in the imputed values.


Evaluation of the Therapeutic Process

Another variable that is worth imputing is the Evaluation of the Therapeutic Process at Discharge (n= 7,378). In case of ties, we selected the imputed values with the value with the minimum evaluation. Must consider that most of the null values could be explained by censoring or not completion of the treatment at the period of the study (n= 7,361).


# Ver distintos valores propuestos para sustancia de inciio
evaluacindelprocesoteraputico_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp2$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp3$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp4$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp5$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp6$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp7$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp8$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp9$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp10$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp11$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp12$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp13$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp14$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp15$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp16$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp17$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp18$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp19$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp20$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp21$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp22$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp23$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp24$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp25$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp26$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp27$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp28$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp29$evaluacindelprocesoteraputico,
       amelia_fit$imputations$imp30$evaluacindelprocesoteraputico
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::arrange(amelia_fit_imputations_imp1_row) %>% 
  dplyr::ungroup() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>%
  dplyr::summarise(high_ach_1=sum(value == "1-High Achievement",na.rm=T),
                   med_ach_2=sum(value == "2-Medium Achievement",na.rm=T),
                  min_ach_3=sum(value =="3-Minimum Achievement",na.rm=T)) %>% 
  dplyr::ungroup() %>% 
  dplyr::mutate(evaluacindelprocesoteraputico_imputation= dplyr::case_when(
      (high_ach_1 >med_ach_2) & (med_ach_2 >min_ach_3)~"1-High Achievement",
      (med_ach_2>high_ach_1) & (med_ach_2 >min_ach_3)~"2-Medium Achievement",
      (min_ach_3>med_ach_2) & (min_ach_3 >high_ach_1)~"3-Minimum Achievement"))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:
##
#CONS_C1_df_dup_SEP_2020 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico)

CONS_C1_df_dup_SEP_2020_match_miss9<-
CONS_C1_df_dup_SEP_2020_match_miss8 %>% 
   dplyr::left_join(evaluacindelprocesoteraputico_imputed[,c("amelia_fit_imputations_imp1_row","evaluacindelprocesoteraputico_imputation")], by=c("row"="amelia_fit_imputations_imp1_row")) %>%
    dplyr::mutate(evaluacindelprocesoteraputico=factor(dplyr::case_when(is.na(evaluacindelprocesoteraputico) & motivodeegreso_mod_imp %in% c("Drop-out","Administrative discharge","Therapeutic discharge","Referral to another treatment")~evaluacindelprocesoteraputico_imputation,
                                                                        is.na(motivodeegreso_mod_imp)~NA_character_,
                                                                        TRUE~as.character(evaluacindelprocesoteraputico)))) %>% 
     dplyr::mutate(evaluacindelprocesoteraputico=parse_factor(as.character(evaluacindelprocesoteraputico),levels=c('1-High Achievement', '2-Medium Achievement','3-Minimum Achievement'), ordered =T,trim_ws=T,include_na =F, locale=locale(encoding = "UTF-8"))) %>% 
  dplyr::select(-evaluacindelprocesoteraputico_imputation) %>% 
  data.table()

CONS_C1_df_dup_SEP_2020_match_miss9 %>% janitor::tabyl(motivodeegreso_mod_imp,evaluacindelprocesoteraputico) %>% 
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 2. Cause of Discharge vs. Evaluation of the Therapeutic Procress"),
               col.names = c("Cause of Discharge","1-High Achievement", "2- Medium Achievement","3- Minimum Achievement","Null Values"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 9) %>%
  kableExtra::scroll_box(width = "100%", height = "375px") 
Table 2. Cause of Discharge vs.Ā Evaluation of the Therapeutic Procress
Cause of Discharge 1-High Achievement 2- Medium Achievement 3- Minimum Achievement Null Values
Administrative discharge 867 4,427 4,488 0
Death 0 0 1 0
Drop-out 1,767 16,839 37,301 0
Referral to another treatment 1,298 5,835 4,705 0
Therapeutic discharge 17,120 6,135 1,118 1
NA 0 0 0 7,854


As seen in the table above, ongoing treatments did not have an evaluation process, which is logically valid, since their treatment competition was not captured.


Treatment Setting (Residential)

We looked over possible imputations to the treatment setting (n=97).


# Ver distintos valores propuestos para estado conyugal
#evaluacindelprocesoteraputico nombre_region tipo_centro_pub

tipo_de_plan_res_imputed<-
 cbind.data.frame(amelia_fit$imputations$imp1$row,
       amelia_fit$imputations$imp1$tipo_de_plan_res,
       amelia_fit$imputations$imp2$tipo_de_plan_res,
       amelia_fit$imputations$imp3$tipo_de_plan_res,
       amelia_fit$imputations$imp4$tipo_de_plan_res,
       amelia_fit$imputations$imp5$tipo_de_plan_res,
       amelia_fit$imputations$imp6$tipo_de_plan_res,
       amelia_fit$imputations$imp7$tipo_de_plan_res,
       amelia_fit$imputations$imp8$tipo_de_plan_res,
       amelia_fit$imputations$imp9$tipo_de_plan_res,
       amelia_fit$imputations$imp10$tipo_de_plan_res,
       amelia_fit$imputations$imp11$tipo_de_plan_res,
       amelia_fit$imputations$imp12$tipo_de_plan_res,
       amelia_fit$imputations$imp13$tipo_de_plan_res,
       amelia_fit$imputations$imp14$tipo_de_plan_res,
       amelia_fit$imputations$imp15$tipo_de_plan_res,
       amelia_fit$imputations$imp16$tipo_de_plan_res,
       amelia_fit$imputations$imp17$tipo_de_plan_res,
       amelia_fit$imputations$imp18$tipo_de_plan_res,
       amelia_fit$imputations$imp19$tipo_de_plan_res,
       amelia_fit$imputations$imp20$tipo_de_plan_res,
       amelia_fit$imputations$imp21$tipo_de_plan_res,
       amelia_fit$imputations$imp22$tipo_de_plan_res,
       amelia_fit$imputations$imp23$tipo_de_plan_res,
       amelia_fit$imputations$imp24$tipo_de_plan_res,
       amelia_fit$imputations$imp25$tipo_de_plan_res,
       amelia_fit$imputations$imp26$tipo_de_plan_res,
       amelia_fit$imputations$imp27$tipo_de_plan_res,
       amelia_fit$imputations$imp28$tipo_de_plan_res,
       amelia_fit$imputations$imp29$tipo_de_plan_res,
       amelia_fit$imputations$imp30$tipo_de_plan_res
       ) %>% 
  melt(id.vars="amelia_fit$imputations$imp1$row") %>% 
  janitor::clean_names() %>% 
  dplyr::group_by(amelia_fit_imputations_imp1_row) %>% 
  dplyr::summarise(n_res=sum(value=="1",na.rm=T),n_amb=sum(value=="0",na.rm=T))

#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:#:#:#::#:#:#:

CONS_C1_df_dup_SEP_2020_match_miss10<-
CONS_C1_df_dup_SEP_2020_match_miss9 %>% 
    dplyr::left_join(dplyr::select(tipo_de_plan_res_imputed,amelia_fit_imputations_imp1_row,n_res,n_amb), by=c("row"="amelia_fit_imputations_imp1_row")) %>% 
  dplyr::mutate(tipo_de_plan_res=factor(dplyr::case_when(is.na(tipo_de_plan_res)& (n_res>n_amb)~"1",is.na(tipo_de_plan_res)& (n_res<n_amb)~"0",TRUE~as.character(tipo_de_plan_res)))) %>%
  dplyr::select(-n_res,-n_amb) %>% 
  data.table()
#CONS_C1_df_dup_SEP_2020_match_miss6
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$tipo_centro_pub))
#table(is.na(CONS_C1_df_dup_SEP_2020_match_miss6$nombre_region))

As a result of the process of imputation, some values were not possible to impute (n=97).


Sample Characteristics

We checked the characteristics of the sample depending on type of treatment (Residential or Outpatients).


#prop.table(table(CONS_C1_df_dup_SEP_2020_match$abandono_temprano_rec,CONS_C1_df_dup_SEP_2020_match$tipo_de_plan_res),2)
match.on_tot <- c("row", "hash_key","sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","tipo_centro_pub","abandono_temprano_rec","evaluacindelprocesoteraputico","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","sexo_2","edad_al_ing","fech_ing_num")
#$109,756
#aƱado los imputados
CONS_C1_df_dup_SEP_2020_match_miss_after_imp<-
CONS_C1_df_dup_SEP_2020_match_miss %>% 
  dplyr::select(-sus_ini_mod_mvv,-estado_conyugal_2,-escolaridad_rec,-freq_cons_sus_prin,-nombre_region,-tipo_centro_pub,-evaluacindelprocesoteraputico,-motivodeegreso_mod_imp,-dg_trs_cons_sus_or,-tipo_de_plan_res,-edad_ini_cons,-via_adm_sus_prin_act) %>% #
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020_match_miss10,
                                 row,
                                 sus_ini_mod_mvv,
                                 estado_conyugal_2,
                                 escolaridad_rec,
                                 freq_cons_sus_prin,
                                 nombre_region,
                                 tipo_centro_pub,
                                 evaluacindelprocesoteraputico,
                                 motivodeegreso_mod_imp,
                                 dg_trs_cons_sus_or,
                                 tipo_de_plan_res,
                                 edad_ini_cons,rn),by="row") %>% 
  dplyr::arrange(tipo_de_plan_res,hash_key,rn) %>% 
  #elimino esta variable porque es accesoria
  dplyr::select(-edad_ini_sus_prin) %>% 
  #para transformar el motivo de egreso
  dplyr::left_join(dplyr::select(CONS_C1_df_dup_SEP_2020,row,fech_egres_num,dias_treat_imp_sin_na),by="row") %>%
  #dplyr::filter(fech_egres_num==18213,!is.na(motivodeegreso_mod_imp)) %>% 
  dplyr::mutate(motivodeegreso_mod_imp=dplyr::case_when(dias_treat_imp_sin_na>=90 & motivodeegreso_mod_imp=="Drop-out"~ "Late Drop-out",
                                                        dias_treat_imp_sin_na<90 & motivodeegreso_mod_imp=="Drop-out"~ "Early Drop-out",
                                                        fech_egres_num==18213 & is.na(motivodeegreso_mod_imp)~"Ongoing treatment",
                                                        TRUE~as.character(motivodeegreso_mod_imp)
                                                        )) %>% #janitor::tabyl(motivodeegreso_mod_imp)
  dplyr::mutate(evaluacindelprocesoteraputico2=dplyr::case_when(fech_egres_num==18213 & is.na(evaluacindelprocesoteraputico)~"Ongoing treatment",
                                                        TRUE~as.character(evaluacindelprocesoteraputico)
  )) %>% 
  dplyr::mutate(sum_miss = base::rowSums(is.na(dplyr::select(.,c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","freq_cons_sus_prin","nombre_region","tipo_centro_pub","evaluacindelprocesoteraputico2","motivodeegreso_mod_imp","dg_trs_cons_sus_or","tipo_de_plan_res","edad_ini_cons","sexo_2","edad_al_ing","fech_ing_num"))))) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(sum_miss=sum(sum_miss)) %>% 
  dplyr::ungroup() 

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss>0)

CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp %>% 
  dplyr::filter(sum_miss==0) %>% 
  dplyr::select(-sum_miss) %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","condicion_ocupacional_corr")], by="row") %>% 
  dplyr::select(-evaluacindelprocesoteraputico2)

#  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[complete.cases(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados[,..match.on_tot]),..match.on_tot] 


Considering that some missing values were not able to imputation (due to ties in the candidate values for imputation or inconsistent values for imputations) (337, users=272), we ended the process having 109,419 complete cases (users=84,776).


kableone <- function(x, ...) {
  capture.output(x <- print(x,...))
  knitr::kable(x,format= "html", format.args= list(decimal.mark= ".", big.mark= ","))
}
match.on.sel<-c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","edad_ini_cons","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","nombre_region","dg_trs_cons_sus_or", "tipo_centro_pub","sexo_2","edad_al_ing","fech_ing_num","condicion_ocupacional_corr")
catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
#length(unique(CONS_C1_df_dup_SEP_2020_match$fech_ing_num))
#:#:#:#:#: DISMINUIR LA HETEROGENEIDAD DE LA FECHA DE INGRESO
# FORMAS DE CONSTREƑIR LA VARIABLE:
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-cut(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,100)
#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-CONS_C1_df_dup_SEP_2020_match_fech_ing_num
#CONS_C1_df_dup_SEP_2020_match_fech_ing_num<-CONS_C1_df_dup_SEP_2020_match$fech_ing_num
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num,0)))
#length(unique(round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)))

#CONS_C1_df_dup_SEP_2020_match$fech_ing_num<-round(CONS_C1_df_dup_SEP_2020_match$fech_ing_num/10,0)
#:#:#:#:#: 

paste0("Inconsistencies in dup vs. rn: ",CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% 
         dplyr::filter(dup!=rn) %>% nrow())
## [1] "Inconsistencies in dup vs. rn: 0"
CONS_C1_df_dup_SEP_2020_match_not_miss2 <-
  CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(dup==1) %>% 
  dplyr::select(-rn,-dias_treat_imp_sin_na,-fech_egres_num)

attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$sus_ini_mod_mvv,"label")<-"Starting Substance"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$estado_conyugal_2,"label")<-"Marital Status"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$escolaridad_rec,"label")<-"Educational Attainment"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$edad_ini_cons,"label")<-"Age of Onset of Drug Use"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$freq_cons_sus_prin,"label")<-"Frequency of use of primary drug"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$nombre_region,"label")<-"Region of the Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_cie_10_rec,"label")<-"Psychiatric Comorbidity"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$dg_trs_cons_sus_or,"label")<-"Drug Dependence"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$evaluacindelprocesoteraputico,"label")<-"Evaluation of the Therapeutic Process"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$abandono_temprano_rec,"label")<-"Early Discharge"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,"label")<-"Residential"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_centro_pub,"label")<-"Public Center"
attr(CONS_C1_df_dup_SEP_2020_match_not_miss2$condicion_ocupacional_corr,"label")<-"Occupational Status"

pre_tab1<-Sys.time()
tab1<-
CreateTableOne(vars = match.on.sel, strata = "tipo_de_plan_res", 
                       data = CONS_C1_df_dup_SEP_2020_match_not_miss2, factorVars = catVars, smd=T)
post_tab1<-Sys.time()
diff_time_tab1=post_tab1-pre_tab1

kableone(tab1, 
         caption = paste0("Table 5. Covariate Balance in the Variables of Interest"),
         col.names= c("Variables","Ambulatory","Residential", "p-values","test","SMD"),
         nonnormal= c("edad_ini_cons","edad_al_ing","fech_ing_num"),#"\\hline",
                       smd=T, test=T, varLabels=T,noSpaces=T, printToggle=T, dropEqual=F) %>% 
    kableExtra::kable_styling(bootstrap_options = c("striped", "hover","condensed"),font_size= 10) %>%
  #()
  row_spec(1, bold = T, italic =T,color ="black",hline_after=T,extra_latex_after="\\arrayrulecolor{white}",font_size= 10) %>%
  #footnote(general = "Here is a general comments of the table. ",
  #        number = c("Footnote 1; ", "Footnote 2; "),
  #         alphabet = c("Footnote A; ", "Footnote B; "),
  #         symbol = c("Footnote Symbol 1; ", "Footnote Symbol 2")
  #         )%>%
  scroll_box(width = "100%", height = "400px") 
0 1 p test SMD
n 72083 12693
Starting Substance (%) <0.001 0.369
Alcohol 41410 (57.4) 5074 (40.0)
Cocaine hydrochloride 2925 (4.1) 513 (4.0)
Cocaine paste 7671 (10.6) 2235 (17.6)
Marijuana 18417 (25.5) 4554 (35.9)
Other 1660 (2.3) 317 (2.5)
Marital Status (%) <0.001 0.309
Married/Shared living arrangements 26166 (36.3) 2911 (22.9)
Separated/Divorced 7713 (10.7) 1318 (10.4)
Single 37340 (51.8) 8332 (65.6)
Widower 864 (1.2) 132 (1.0)
Educational Attainment (%) <0.001 0.124
3-Completed primary school or less 21857 (30.3) 4572 (36.0)
2-Completed high school or less 37209 (51.6) 6136 (48.3)
1-More than high school 13017 (18.1) 1985 (15.6)
Age of Onset of Drug Use (median [IQR]) 15.00 [14.00, 18.00] 15.00 [13.00, 17.00] <0.001 nonnorm 0.090
Frequency of use of primary drug (%) <0.001 0.767
1 day a week or more 5323 (7.4) 273 (2.2)
2 to 3 days a week 22322 (31.0) 1323 (10.4)
4 to 6 days a week 12223 (17.0) 1649 (13.0)
Daily 28265 (39.2) 9231 (72.7)
Did not use 1094 (1.5) 84 (0.7)
Less than 1 day a week 2856 (4.0) 133 (1.0)
Origen de Ingreso (Primera Entrada)/Motive of Admission to Treatment (First Entry) (%) <0.001 0.509
Spontaneous 33648 (46.7) 4270 (33.6)
Assisted Referral 4933 (6.8) 3004 (23.7)
Other 3753 (5.2) 738 (5.8)
Justice Sector 7134 (9.9) 813 (6.4)
Health Sector 22615 (31.4) 3868 (30.5)
Psychiatric Comorbidity (%) <0.001 0.317
Without psychiatric comorbidity 29015 (40.3) 3247 (25.6)
Diagnosis unknown (under study) 13270 (18.4) 2763 (21.8)
With psychiatric comorbidity 29798 (41.3) 6683 (52.7)
Region of the Center (%) <0.001 0.388
Antofagasta (02) 2291 (3.2) 697 (5.5)
AraucanĆ­a (09) 2221 (3.1) 162 (1.3)
Arica (15) 1315 (1.8) 728 (5.7)
Atacama (03) 1831 (2.5) 258 (2.0)
AysƩn (11) 797 (1.1) 42 (0.3)
BiobĆ­o (08) 5091 (7.1) 703 (5.5)
Coquimbo (04) 2798 (3.9) 268 (2.1)
Los Lagos (10) 2646 (3.7) 375 (3.0)
Los RĆ­os (14) 1113 (1.5) 185 (1.5)
Magallanes (12) 929 (1.3) 31 (0.2)
Maule (07) 4208 (5.8) 638 (5.0)
Metropolitana (13) 35961 (49.9) 6256 (49.3)
Ƒuble (16) 540 (0.7) 20 (0.2)
O’Higgins (06) 3638 (5.0) 567 (4.5)
TarapacĆ” (01) 1350 (1.9) 598 (4.7)
ValparaĆ­so (05) 5354 (7.4) 1165 (9.2)
Drug Dependence = TRUE (%) 50002 (69.4) 11645 (91.7) <0.001 0.589
Public Center = TRUE (%) 57121 (79.2) 3614 (28.5) <0.001 1.183
Sexo Usuario/Sex of User = Women (%) 17394 (24.1) 3937 (31.0) <0.001 0.155
Edad a la Fecha de Ingreso a Tratamiento (numƩrico continuo) (Primera Entrada)/Age at Admission to Treatment (First Entry) (median [IQR]) 34.43 [27.55, 43.46] 32.63 [26.34, 40.85] <0.001 nonnorm 0.185
Fecha de Ingreso a Tratamiento (NumƩrico)(c)/Date of Admission to Treatment (Numeric)(c) (median [IQR]) 16580.00 [15730.00, 17359.00] 16153.00 [15342.00, 17023.00] <0.001 nonnorm 0.293
Occupational Status (%) <0.001 1.025
Employed 39517 (54.8) 1771 (14.0)
Inactive 7674 (10.6) 1195 (9.4)
Looking for a job for the first time 172 (0.2) 20 (0.2)
No activity 2664 (3.7) 1820 (14.3)
Not seeking for work 492 (0.7) 335 (2.6)
Unemployed 21564 (29.9) 7552 (59.5)
#"tipo_de_plan_ambulatorio",
#https://cran.r-project.org/web/packages/tableone/vignettes/smd.html
#http://rstudio-pubs-static.s3.amazonaws.com/405765_2ce448f9bde24148a5f94c535a34b70e.html
#https://cran.r-project.org/web/packages/tableone/vignettes/introduction.html
#https://cran.r-project.org/web/packages/tableone/tableone.pdf
#https://www.rdocumentation.org/packages/tableone/versions/0.12.0/topics/CreateTableOne

## Construct a table 
#standardized mean differences of greater than 0.1


We checked the similarity in the samples using other measures, such as the variance ratio of the samples and Kolmogorov-Smirnov(KS) statistics.


library(cobalt)

bal2<-bal.tab(CONS_C1_df_dup_SEP_2020_match_not_miss2[,match.on.sel], treat = CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res,
         thresholds = c(m = .1, v = 2),
         binary = "std", 
         continuous = "std",
         stats = c("mean.diffs", "variance.ratios","ks.statistics"))
#"mean.diffs", "variance.ratios","ks.statistics","ovl.coefficient"

options(knitr.kable.NA = '')

bal2$Balance[,2]<-round(bal2$Balance[,2],2)
bal2$Balance[,4]<-round(bal2$Balance[,4],2)
bal2$Balance[,6]<-round(bal2$Balance[,6],2)

var_names<- 
    list("origen_ingreso_mod_Spontaneous"="Motive Admission-Spontaneous",
         "origen_ingreso_mod_Assisted Referral"= "Motive Admission-Assisted Referral",
         "origen_ingreso_mod_Other"="Motive Admission-Other",
         "origen_ingreso_mod_Justice Sector"= "Motive Admission-Justice Sector",
         "origen_ingreso_mod_Health Sector"="Motive Admission-Health Sector",
         "dg_cie_10_rec_Without psychiatric comorbidity"="ICD-10-Wo/Psych Comorbidity",
         "dg_cie_10_rec_Diagnosis unknown (under study)"="ICD-10-Dg. Unknown/under study",
         "dg_cie_10_rec_With psychiatric comorbidity"="ICD-10-W/Psych Comorbidity",
         "sexo_2_Women"="Sex-Women",
         "edad_al_ing"="Age at Admission",
         "fech_ing_num"="Date of Admission",
         "duplicates_filtered"="Treatments (#)",
         "more_one_treat"=">1 treatment",
         "sus_ini_mod_mvv_Alcohol"= "Starting Substance-Alcohol",
         "sus_ini_mod_mvv_Cocaine hydrochloride"= "Starting Substance-Cocaine hydrochloride",
         "sus_ini_mod_mvv_Cocaine paste"="Starting Substance-Cocaine paste",
         "sus_ini_mod_mvv_Marijuana"="Starting Substance-Marijuana",
         "sus_ini_mod_mvv_Other"="Starting Substance-Other",
         "estado_conyugal_2_Married/Shared living arrangements"="Marital Status-Married/Shared liv. arr.",
         "condicion_ocupacional_corr_Employed"="Occ.Status-Employed",
         "condicion_ocupacional_corr_Inactive"="Occ.Status-Inactive",
         "condicion_ocupacional_corr_Looking for a job for the first time"="Occ.Status-Looking 1st job",
         "condicion_ocupacional_corr_No activity"="Occ.Status- No activity",
         "condicion_ocupacional_corr_Not seeking for work"="Occ.Status- Not seeking work",
         "condicion_ocupacional_corr_Unemployed"="Occ.Status- Unemployed",
         "estado_conyugal_2_Separated/Divorced"="Marital Status-Separated/Divorced",
         "estado_conyugal_2_Single"= "Marital Status-Single",
         "estado_conyugal_2_Widower"="Marital Status-Widower",
         "escolaridad_rec_3-Completed primary school or less"="Educational Attainment-PS or less",
         "escolaridad_rec_2-Completed high school or less"="Educational Attainment-HS or less",
         "escolaridad_rec_1-More than high school"="Educational Attainment-More than HS",
         "freq_cons_sus_prin_1 day a week or more"="Freq Drug Cons-1d/wk or more",
         "freq_cons_sus_prin_2 to 3 days a week"="Freq Drug Cons-2-3d/wk",
         "freq_cons_sus_prin_4 to 6 days a week"="Freq Drug Cons-4-6d/wk",
         "freq_cons_sus_prin_Daily"="Freq Drug Cons-Daily",
         "freq_cons_sus_prin_Did not use"="Freq Drug Cons-Did not use",
         "freq_cons_sus_prin_Less than 1 day a week"="Freq Drug Cons-Less 1d/wk",
         "nombre_region_Antofagasta (02)"="Region-Antofagasta(02)",
         "nombre_region_AraucanĆ­a (09)"="Region-AraucanĆ­a(09)",
         "nombre_region_Arica (15)"="Region-Arica(15)",
         "nombre_region_Atacama (03)"="Region-Atacama(03)",
         "nombre_region_AysƩn (11)"="Region-AysƩn(11)",
         "nombre_region_BiobĆ­o (08)"="Region- BiobĆ­o(08)",
         "nombre_region_Coquimbo (04)"="Region-Coquimbo(04)",
         "nombre_region_Los Lagos (10)"="Region-Los Lagos(10)",
         "nombre_region_Los RĆ­os (14)"="Region-Los RĆ­os(14)",
         "nombre_region_Magallanes (12)"="Region-Magallanes(12)",
         "nombre_region_Maule (07)"="Region-Maule(07)",
         "nombre_region_Metropolitana (13)"="Region-Metropolitana(13)",
         "nombre_region_Ƒuble (16)"="Region-Ƒuble(16)",
         "nombre_region_O'Higgins (06)"="Region-O'Higgins(06)",
         "nombre_region_TarapacĆ” (01)"="Region-TarapacĆ”(01)",
         "nombre_region_ValparaĆ­so (05)"="Region-ValparaĆ­so(05)",
         "tipo_centro_pub"="Public Center",
         "dg_trs_cons_sus_or"= "Drug Dependence",
         "edad_ini_cons"="Age of Onset of Drug Use",
         "rn"="Treatment")

var.names<-data.table(data.frame(unlist(var_names)),keep.rownames = T) %>% janitor::clean_names()

balance_prev<-
data.table::data.table(bal2$Balance[,1:6],keep.rownames = T) %>%
  dplyr::arrange(-abs(Diff.Un)) %>% 
  dplyr::left_join(var.names,by="rn") %>% 
  dplyr::select(unlist_var_names,everything()) %>% 
  dplyr::select(-rn) 

balance_prev %>% #data.table::data.table(keep.rownames = F)
    knitr::kable(.,format = "html", format.args = list(decimal.mark = ".", big.mark = ","),
               caption = paste0("Table 4. Covariate Balance in the Variables of Interest"),
               col.names = c("Variables","Nature of Variables", "Unadjusted SMDs","Threshold","Unadjusted Variance Ratios","Threshold","Unadjusted KS"),
               align =rep('c', 101)) %>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size = 10) %>%
  kableExtra::add_footnote( c(paste("Note. ")), 
                            notation = "none") %>%
  kableExtra::scroll_box(width = "100%", height = "375px")
Table 4. Covariate Balance in the Variables of Interest
Variables Nature of Variables Unadjusted SMDs Threshold Unadjusted Variance Ratios Threshold Unadjusted KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07
Region-TarapacĆ”(01) Binary 0.16 Not Balanced, >0.1 0.03
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06
Region-AraucanĆ­a(09) Binary -0.12 Not Balanced, >0.1 0.02
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07
Region-AysƩn(11) Binary -0.09 Balanced, <0.1 0.01
Region-Ƒuble(16) Binary -0.09 Balanced, <0.1 0.01
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02
Region- BiobĆ­o(08) Binary -0.06 Balanced, <0.1 0.02
Region-ValparaĆ­so(05) Binary 0.06 Balanced, <0.1 0.02
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00
Region-Los RĆ­os(14) Binary -0.01 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00
Note.


We generated a plot to focus on unbalanced data.


Figure 8. Covariates Balance on Different Values

Figure 8. Covariates Balance on Different Values

Specification

First, we had to discretize categorical variables into logical parameters, and for continuous covariates, we divide them into 20 equal parts.


catVars<-
c("sus_ini_mod_mvv","estado_conyugal_2","escolaridad_rec","tipo_centro_pub","freq_cons_sus_prin","origen_ingreso_mod","dg_cie_10_rec","dg_trs_cons_sus_or","nombre_region","tipo_de_plan_res","sexo_2","condicion_ocupacional_corr")
columna_dummy <- function(df, columna) {
  df %>% 
  mutate_at(columna, ~paste(columna, eval(as.symbol(columna)), sep = "_")) %>% 
    mutate(valor = 1) %>% 
    spread(key = columna, value = valor, fill = 0)
}

quantiles = function(covar, n_q) {
    p_q = seq(0, 1, 1/n_q)
    val_q = quantile(covar, probs = p_q, na.rm = TRUE)
    covar_out = rep(NA, length(covar))
    for (i in 1:n_q) {
        if (i==1) {covar_out[covar<val_q[i+1]] = i}
        if (i>1 & i<n_q) {covar_out[covar>=val_q[i] & covar<val_q[i+1]] = i}
        if (i==n_q) {covar_out[covar>=val_q[i] & covar<=val_q[i+1]] = i}}
    covar_out
}

CONS_C1_df_dup_SEP_2020_match_not_miss3<-CONS_C1_df_dup_SEP_2020_match_not_miss2
for (i in c(1:length(catVars))){#catVars[-10] excluding treatment indicator
  cat<-as.character(catVars[i])#catVars[-10] excluding treatment indicator
  CONS_C1_df_dup_SEP_2020_match_not_miss3<-columna_dummy(CONS_C1_df_dup_SEP_2020_match_not_miss3,cat)
}
CONS_C1_df_dup_SEP_2020_match_not_miss3$tipo_de_plan_res_FALSE<-NULL
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_ini_cons,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$edad_al_ing,20)
CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num<-quantiles(CONS_C1_df_dup_SEP_2020_match_not_miss3$fech_ing_num,20)
match.on.sel2<-names(CONS_C1_df_dup_SEP_2020_match_not_miss3)[-c(1,2,5)]
#"edad_ini_cons","edad_al_ing","fech_ing_num")

CONS_SEP_match = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss2[order(CONS_C1_df_dup_SEP_2020_match_not_miss2$tipo_de_plan_res, decreasing = TRUE), ])

CONS_SEP_match_dum = data.table::data.table(CONS_C1_df_dup_SEP_2020_match_not_miss3 %>% dplyr::arrange(factor(row, levels = CONS_SEP_match$row)))


Match

The matched variables were defined for the treatments at baseline (n=84,776).


library(designmatch)

#fine = list(covs = fine_covs)
#solver = list(name = name, t_max = t_max, approximate = 1, round_cplex = 0, trace_cplex = 0).
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
# 1. Gurobi installation

#For an exact solution, we strongly recommend running designmatch either with CPLEX or Gurobi.  Between these two solvers, the R interface of Gurobi is considerably easier to install.  Here we provide general instructions for manually installing Gurobi and its R interface in Mac and Windows machines.

#1. Create a free academic license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/creating_a_new_academic_li.html

#2. Install the software
#   2.1. In http://www.gurobi.com/index, go to Downloads > Gurobi Software
#   2.2. Choose your operating system and press download
#
#3. Retrieve and set up your Gurobi license
#   2.1. Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_and_setting_up_.html
#   2.2. Then follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/retrieving_a_free_academic.html
#
#4. Test your license
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/testing_your_license.html
#
#5. Install the R interface of Gurobi   
#   Follow the instructions in: http://www.gurobi.com/documentation/7.0/quickstart_windows/r_installing_the_r_package.html
#   * In Windows, in R run the command install.packages("PATH\\gurobi_7.X-Y.zip", repos=NULL) where path leads to the file gurobi_7.X-Y.zip (for example PATH=C:\\gurobi702\\win64\\R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#   * In MAC, in R run the command install.packages('PATH/gurobi_7.X-Y.tgz', repos=NULL) where path leads to the file gurobi_7.X-Y.tgz (for example PATH=/Library/gurobi702/mac64/R; note that the path may be different in your computer), and "7.X-Y" refers to the version you are installing.
#       
#6. Test the installation 
#   Load the library and run the examples therein
#   * A possible error that you may get is the following: "Error: package ā€˜slam’ required by ā€˜gurobi’ could not be found". If that case, install.packages('slam') and try again.
#   You should be all set!
CONS_SEP_match$tipo_de_plan_res<-ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:#:#:#:#:#:#:#:#:#:#:#:#:#:#:##:
require(slam)
# Solver options
#default solver is glpk with approximate = 1
#For an exact solution, we strongly recommend using cplex or gurobi as they are much faster than the other solvers, but they do require a license (free for academics, but not for people outside universities)
t_max = 60*6
solver = "gurobi" #cplex, glpk, gurobi and symphony
solver = list(name = solver, 
  t_max = t_max, #t_max is a scalar with the maximum time limit for finding the matches.within this time limit, a partial, suboptimal solution is given
  approximate = 1,#. If approximate = 1 (the default), an approximate solution is found via a relaxation of the original integer program. #FEB2021: I dont want to violate some balancing constraints to some extent. Change to 0.
  round_cplex = 0, 
  trace = 1#turns the optimizer output on
  )

#Indicador de tratamiento
t_ind= ifelse(CONS_SEP_match$tipo_de_plan_res=="1",1,0)

#table(is.na(CONS_SEP_match$tipo_de_plan_res))

# Moment balance: constrain differences in means to be at most 0.1 standard deviations apart
#:#:#:#:#:#:#:#:#:#:#:#:#:
#######mom_covs is a matrix where each column is a covariate whose mean is to be balanced
#######mom_tols is a vector of tolerances for the maximum difference in means for the covariates in mom_covs
#######mom_targets is a vector of target moments (e.g., means) of a distribution to be approximated by matched sampling. is optional, but if #######mom_covs is specified then mom_tols needs to be specified too
#######The lengths of mom_tols and mom_target have to be equal to the number of columns of mom_covs
mom_covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
mom_tols = absstddif(mom_covs, t_ind, .15)# original, 0.05, ahora probarƩ con 0.7
mom = list(covs = mom_covs, tols = mom_tols, targets = NULL)

# Mean balance
covs = cbind(CONS_SEP_match$edad_al_ing,
                 CONS_SEP_match$fech_ing_num,
                 CONS_SEP_match$edad_ini_cons)
meantab(covs, t_ind)
##      Mis      Min      Max   Mean T   Mean C Std Dif P-val
## [1,]   0    14.88    88.84    35.99    35.99       0     1
## [2,]   0 13621.00 18199.00 16445.49 16445.49       0     1
## [3,]   0     5.00    74.00    16.51    16.51       0     1
# Fine balance
#is a matrix where each column is a nominal covariate for fine balance
fine_covs = cbind(CONS_SEP_match$origen_ingreso_mod,
                  CONS_SEP_match$dg_cie_10_rec,
                  CONS_SEP_match$sexo_2,
                  CONS_SEP_match$sus_ini_mod_mvv,
                  CONS_SEP_match$tipo_centro_pub, #cuidado
                  CONS_SEP_match$estado_conyugal_2, 
                  CONS_SEP_match$escolaridad_rec,
                  CONS_SEP_match$freq_cons_sus_prin,
                  CONS_SEP_match$nombre_region,
                  CONS_SEP_match$condicion_ocupacional_corr,
                  #d_match_no_duplicates$evaluacindelprocesoteraputico,
                  CONS_SEP_match$dg_trs_cons_sus_or
)
fine = list(covs = fine_covs)

# 11,448; No. of controls: 11,448"
# 11,452; No. of controls: 11,452"
# 11,459; No. of controls: 11,459" #when I changed tolerance from .0999 to .1999
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#MATCH
start.time <- Sys.time()
set.seed(2125)
out = cardmatch(t_ind, #ES NECESARIO QUE LOS TRATAMIENTOS ESTEN ORDENADOS Y LOS OTROS VECTORES TAMBIƋN 
                mom = mom,# ya los definĆ­ list(covs = mom_covs, tols = mom_tols, targets = mom_targets), 
          fine = fine, 
          solver = solver)
##   Building the matching problem... 
##   Gurobi optimizer is open... 
##   Finding the optimal matches... 
## Gurobi Optimizer version 9.1.1 build v9.1.1rc0 (win64)
## Thread count: 8 physical cores, 8 logical processors, using up to 8 threads
## Optimize a model with 60 rows, 84776 columns and 1441192 nonzeros
## Model fingerprint: 0xf9a678f3
## Variable types: 0 continuous, 84776 integer (84776 binary)
## Coefficient statistics:
##   Matrix range     [1e+00, 2e+04]
##   Objective range  [1e+00, 1e+00]
##   Bounds range     [0e+00, 0e+00]
##   RHS range        [0e+00, 0e+00]
## Found heuristic solution: objective -0.0000000
## Presolve time: 1.61s
## Presolved: 60 rows, 84776 columns, 1440986 nonzeros
## Variable types: 0 continuous, 84776 integer (84776 binary)
## 
## Root relaxation: objective 1.145804e+04, 389 iterations, 0.65 seconds
## 
##     Nodes    |    Current Node    |     Objective Bounds      |     Work
##  Expl Unexpl |  Obj  Depth IntInf | Incumbent    BestBd   Gap | It/Node Time
## 
##      0     0 11458.0374    0   31   -0.00000 11458.0374      -     -    2s
## H    0     0                    1772.0000000 11458.0374   547%     -    6s
##      0     0 11458.0374    0   31 1772.00000 11458.0374   547%     -    7s
## H    0     0                    11458.000000 11458.0374  0.00%     -    8s
##      0     0 11458.0374    0   31 11458.0000 11458.0374  0.00%     -    8s
## 
## Explored 1 nodes (389 simplex iterations) in 8.10 seconds
## Thread count was 8 (of 8 available processors)
## 
## Solution count 3: 11458 1772 -0 
## 
## Optimal solution found (tolerance 1.00e-04)
## Best objective 1.145800000000e+04, best bound 1.145800000000e+04, gap 0.0000%
##   Optimal matches found
#FEB2021= If I change to bmatch, error can't allocate vector size 3.4gb
end.time <- Sys.time()
time.taken <- end.time - start.time
# Fine balance (note here we are getting an approximate solution)
#for (i in 1:ncol(fine_covs)) {     
#   print(finetab(fine_covs[, i], t_id_1, c_id_1))
#}
# Indices of the treated units and matched controls
t_id_1 = out$t_id  
c_id_1 = out$c_id   
group = out$group_id    
ids_matched<-cbind.data.frame(t_id_1, c_id_1,group)

paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
## [1] "No. of treatments: 11,458; No. of controls: 11,458"
# Fine balance (note here we are getting an approximate solution)
finetab_match1<-data.frame()
for (i in 1:ncol(fine_covs)) {      
    #finetab_match1<- rbind.data.frame(
  finetab(fine_covs[, i], t_id_1, c_id_1)
}

d_match = CONS_SEP_match[c(t_id_1, c_id_1), ]

paste0("Number of duplicated rows: ",d_match %>%  dplyr::group_by(row) %>%  dplyr::mutate(n_row=n()) %>% dplyr::ungroup() %>% dplyr::filter(n_row>1) %>% nrow())
## [1] "Number of duplicated rows: 0"
paste0("Percentage of the selected treatments: ",scales::percent(length(t_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==1) %>% nrow()))
## [1] "Percentage of the selected treatments: 90%"
paste0("Percentage of the selected controls: ",
       scales::percent(length(c_id_1)/CONS_SEP_match %>% dplyr::filter(tipo_de_plan_res==0) %>% nrow()))
## [1] "Percentage of the selected controls: 16%"
#cuidado, el anterior me encontró mÔs del mismo control para un tratado
#por eso ocuparƩ el de mƔs abajo.
#EL DE A CONTINUACIƓN ES ERRƓNEO PORQUE ES POR POSICIƓN, NO POR COINCIDENCIA DEL NƚMERO CON LA FILA
#d_match_no_duplicates = CONS_SEP_match[which(CONS_SEP_match$row %in% c(t_id_1, c_id_1)), ]


Explore Results of the Matching


Age at Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Age of Onset of Drug Use

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Date of Admission

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample

Figure 9. Empirical Cumulative Distribution Functions on the Matched Sample


Love plot

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 10. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


Balance

Table 5. Covariate Balance in the Variables of Interest
Unadjusted
Adjusted
Variables Nature of Variables SMDs Threshold Variance Ratios Threshold KS SMDs Threshold Variance Ratios Threshold KS
Public Center Binary -1.18 Not Balanced, >0.1 0.51 0.00 Balanced, <0.1 0.00
Occ.Status-Employed Binary -0.95 Not Balanced, >0.1 0.41 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Daily Binary 0.72 Not Balanced, >0.1 0.34 0.00 Balanced, <0.1 0.00
Occ.Status- Unemployed Binary 0.62 Not Balanced, >0.1 0.30 0.00 Balanced, <0.1 0.00
Drug Dependence Binary 0.59 Not Balanced, >0.1 0.22 0.00 Balanced, <0.1 0.00
Freq Drug Cons-2-3d/wk Binary -0.52 Not Balanced, >0.1 0.21 0.00 Balanced, <0.1 0.00
Motive Admission-Assisted Referral Binary 0.48 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
Occ.Status- No activity Binary 0.38 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Alcohol Binary -0.36 Not Balanced, >0.1 0.17 0.00 Balanced, <0.1 0.00
>1 treatment Binary 0.33 Not Balanced, >0.1 0.14 0.23 Not Balanced, >0.1 0.10
ICD-10-Wo/Psych Comorbidity Binary -0.32 Not Balanced, >0.1 0.15 0.00 Balanced, <0.1 0.00
Treatments (#) Contin. 0.31 Not Balanced, >0.1 1.91 Balanced, <2 0.14 0.21 Not Balanced, >0.1 1.46 Balanced, <2 0.10
Marital Status-Married/Shared liv. arr. Binary -0.30 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Date of Admission Contin. -0.29 Not Balanced, >0.1 1.00 Balanced, <2 0.14 -0.15 Not Balanced, >0.1 0.93 Balanced, <2 0.08
Marital Status-Single Binary 0.28 Not Balanced, >0.1 0.14 0.00 Balanced, <0.1 0.00
Motive Admission-Spontaneous Binary -0.27 Not Balanced, >0.1 0.13 0.00 Balanced, <0.1 0.00
Freq Drug Cons-1d/wk or more Binary -0.25 Not Balanced, >0.1 0.05 0.00 Balanced, <0.1 0.00
ICD-10-W/Psych Comorbidity Binary 0.23 Not Balanced, >0.1 0.11 0.00 Balanced, <0.1 0.00
Starting Substance-Marijuana Binary 0.23 Not Balanced, >0.1 0.10 0.00 Balanced, <0.1 0.00
Region-Arica(15) Binary 0.21 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine paste Binary 0.20 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Age at Admission Contin. -0.19 Not Balanced, >0.1 0.84 Balanced, <2 0.07 0.06 Balanced, <0.1 0.98 Balanced, <2 0.04
Freq Drug Cons-Less 1d/wk Binary -0.19 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Region-TarapacĆ”(01) Binary 0.16 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Sex-Women Binary 0.15 Not Balanced, >0.1 0.07 0.00 Balanced, <0.1 0.00
Occ.Status- Not seeking work Binary 0.15 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Motive Admission-Justice Sector Binary -0.13 Not Balanced, >0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-PS or less Binary 0.12 Not Balanced, >0.1 0.06 0.00 Balanced, <0.1 0.00
Region-AraucanĆ­a(09) Binary -0.12 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Magallanes(12) Binary -0.12 Not Balanced, >0.1 0.01 0.00 Balanced, <0.1 0.00
Freq Drug Cons-4-6d/wk Binary -0.11 Not Balanced, >0.1 0.04 0.00 Balanced, <0.1 0.00
Region-Antofagasta(02) Binary 0.11 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Coquimbo(04) Binary -0.10 Not Balanced, >0.1 0.02 0.00 Balanced, <0.1 0.00
Region-AysƩn(11) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Ƒuble(16) Binary -0.09 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Age of Onset of Drug Use Contin. -0.09 Balanced, <0.1 0.91 Balanced, <2 0.07 0.00 Balanced, <0.1 1.01 Balanced, <2 0.01
ICD-10-Dg. Unknown/under study Binary 0.08 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Freq Drug Cons-Did not use Binary -0.08 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Educational Attainment-HS or less Binary -0.07 Balanced, <0.1 0.03 0.00 Balanced, <0.1 0.00
Educational Attainment-More than HS Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region- BiobĆ­o(08) Binary -0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-ValparaĆ­so(05) Binary 0.06 Balanced, <0.1 0.02 0.00 Balanced, <0.1 0.00
Region-Los Lagos(10) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Maule(07) Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Occ.Status-Inactive Binary -0.04 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Other Binary 0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-Atacama(03) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Region-O’Higgins(06) Binary -0.03 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Motive Admission-Health Sector Binary -0.02 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Marital Status-Widower Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Occ.Status-Looking 1st job Binary -0.02 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Starting Substance-Other Binary 0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Marital Status-Separated/Divorced Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Los RĆ­os(14) Binary -0.01 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Region-Metropolitana(13) Binary -0.01 Balanced, <0.1 0.01 0.00 Balanced, <0.1 0.00
Starting Substance-Cocaine hydrochloride Binary 0.00 Balanced, <0.1 0.00 0.00 Balanced, <0.1 0.00
Note. Unadjusted (n=84,776) ; Adjusted (n=22,916) ; Total pairs: 11,458


Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample

Figure 13. Love plot of the Matched Sample in Covariates v/s Unmatched Sample


We allowed to tolerate fech_ing_num (SMD=0.16), because the date of admission not necessarily had to be strictly balanced, assuming that not every user had to be admitted to treatment in exact dates.

Survival Setting

Bivariate

irrs<-function(x, y="event", z="person_days",db){
  #x= variable que agrupa
  #y= evento explicado
  #z= person days
  #db= base de datos
  fmla <- as.formula(paste0(y,"~",x))
  fmla2 <- as.formula(paste0(z,"~",x))
assign(paste0("irr_",y,"_por_",x),
       rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)]
    )
   )
return(
  rateratio.test::rateratio.test(
     x=as.numeric(xtabs(fmla, data=get(db)))[c(2,1)],
     n=as.numeric(xtabs(fmla2, data=get(db)))[c(2,1)]
      )
    )
}

#CONS_C1_df_dup_SEP_2020%>% 
#  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% 
#  janitor::tabyl(condicion_ocupacional_corr)

# d_match %>% 
    #dplyr::group_by(row) %>% dplyr::mutate(rn_row=row_number()) %>% janitor::tabyl(rn_row)
#22,914

#
#d_match_surv %>% janitor::tabyl(duplicates_filtered,event)
#nrow(ids_matched)/2 =11,457

#CONS_SEP_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

ids_matched_filter<-
ids_matched %>% 
    dplyr::group_by(t_id_1) %>% 
    dplyr::mutate(rn_id=row_number()) %>% 
    dplyr::ungroup() %>% 
    dplyr::filter(rn_id==1)

ids_matched_rows<-cbind.data.frame("row_t"=CONS_SEP_match[c(t_id_1),"row"],
                        t_id_1,
                        "row_c"=CONS_SEP_match[c(c_id_1),"row"],
                        c_id_1) %>% 
  janitor::clean_names() %>% 
  dplyr::left_join(subset(ids_matched_filter,select=-c_id_1),by="t_id_1")

CONS_C1_df_dup_SEP_2020_irrs_health<-  
d_match %>% 
  dplyr::left_join(CONS_C1_df_dup_SEP_2020[c("row","dias_treat_imp_sin_na", "event", "person_days", "person_years","diff_bet_treat")],by="row") %>%
  dplyr::left_join(ids_matched_rows, by=c("row")) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,NA)) %>% 
  dplyr::select(-rn_id,-group) %>% #glimpse()
  dplyr::rename("row_c"="row_2") %>% 
  dplyr::left_join(ids_matched_rows, by=c("row"="row_2")) %>% 
  dplyr::mutate(t_id_1=ifelse(!is.na(t_id_1.x),t_id_1.x,t_id_1.y)) %>% 
  dplyr::mutate(c_id_1=ifelse(!is.na(c_id_1.x),c_id_1.x,c_id_1.y)) %>% 
  dplyr::mutate(row_c=ifelse(!is.na(row_c),row_c,row.y)) %>% 
  dplyr::mutate(group_match=ifelse(!is.na(group),group,group_match)) %>% 

  dplyr::select(-t_id_1.x,-c_id_1.x,-t_id_1.y,-c_id_1.y,-group,-row.y,-rn_id) %>% #glimpse()
  
  dplyr::mutate(res_drop_out=dplyr::case_when(
  tipo_de_plan_res==1 & abandono_temprano_rec==TRUE ~1,
  TRUE~0)) %>% 
  dplyr::mutate(min_ach=dplyr::case_when(
  evaluacindelprocesoteraputico=="3-Minimum Achievement" ~1,
  TRUE~0)) %>% 
  dplyr::mutate(res_drop_out=factor(res_drop_out)) %>% 
    dplyr::mutate(min_ach=factor(min_ach)) %>% 
  dplyr::mutate(status_censorship=dplyr::case_when(
  motivodeegreso_mod_imp=="Ongoing treatmentt" ~1,
  TRUE~0)) 
  
# CONS_C1_df_dup_SEP_2020_irrs_health%>% janitor::tabyl(cnt_diagnostico_trs_fisico_irr)
#label(CONS_C1_df_dup_SEP_2020_prev4_explore$dg_fis_anemia) <- "Physical Dg. Anemia"
#   cnt_mod_cie_10_or cnt_otros_probl_at_sm_or

#22,914
#d_match %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

#27 Y ALGO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% nrow()

# HAY UN SEGUNDO TRATAMIENTO PARA 4,565 CASOS
#PARA VER SI HAY MAS DE UN CASO POR USUARIO
#CONS_C1_df_dup_SEP_2020_irrs_health %>% dplyr::group_by(hash_key) %>% dplyr::mutate(rn_hash=row_number()) %>% dplyr::ungroup() %>% janitor::tabyl(rn_hash)

#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_

irrs_early_drop<-irrs(x="abandono_temprano_rec" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_plan<-irrs(x="tipo_de_plan_res" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_res_early<-irrs(x="res_drop_out" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")
irrs_min_ach<-irrs(x="min_ach" ,db="CONS_C1_df_dup_SEP_2020_irrs_health")


The incidence rate of readmission was 1.62 (95% IC 1.52-1.73) in users that had at least an early dropout, compared with users that did not have a physical condition at baseline (p = 0.000).


Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline

Figure 12. Cum. Hazards to Experience Readmission to SUD Treatment, by Ealy Dropout of Treatment at Baseline


The incidence rate of readmission was 1.24 (95% IC 1.18-1.3) in users that had a residential plan, compared with users that had an ambulatory plan at baseline (p = 0.000).


Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline

Figure 13. Cum. Hazards to Experience Readmission to SUD Treatment, by Type of Plan at Baseline


The incidence rate of readmission was 1.66 (95% IC 1.54-1.79) in users that had a residential plan and an early dropout, compared with the rest of users at baseline (p = 0.000).


Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout

Figure 14. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person in a Residential Treatment with an Early Dropout


The incidence rate of readmission was 1.44 (95% IC 1.37-1.52) in users that had a minimum achievement of the therapeutic goals, compared with the rest of users at baseline (p = 0.000).


Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals

Figure 15. Cum. Hazards to Experience Readmission to SUD Treatment, whether it was a person had a Minimum Achievement in Therapeutic Goals


Multivariate


Inference for the regression coefficients is based on a within-pair treatment effect.


#The stratified Cox model can be used to perform Cox regression on matched designs by using stratification but it can also be done by modeling with frailties

#Some believe that accounting for the matching isn't necessary at all, since it doesn't affect beta coefficients materially and the variables which you have matched on can simply be adjusted for as covariates in the model; this is sufficient in most cases.

#A matched cohort study involves pairs (or clusters in case several untreated subjects are matched with each of the treated individuals) formed to include individuals who differ with respect to treatment but may be matched on certain baseline characteristics.

# Two common methods for analyzing paired/clustered survival data involve a stratified and a marginal Cox model, which represent 2 different approaches of accounting for potential correlation between paired outcomes (for discussion see Glidden and Vittinghoff [5]).

#A regression model is often a more powerful tool in detecting treatment effect than a matched study.

#Choices in study design are regression modeling or matched-pairs study.

#Brazauskas, R., & Logan, B. R. (2016). Observational Studies: Matching or Regression? Biology of Blood and Marrow Transplantation, 22(3), 557–563. doi:10.1016/j.bbmt.2015.12.005 



#simple expression of the common HR estimator would be a useful summary of exposure effect

#Shinozaki, T., Mansournia, M. A., & Matsuyama, Y. (2017). On hazard ratio estimators by proportional hazards models in matched-pair cohort studies. Emerging themes in epidemiology, 14, 6. https://doi.org/10.1186/s12982-017-0060-8

# "The covariate effects are so odd that we'll never model them correctly, so treat each combination as unique."The data set two needs to have each treated subject + their controls in a separate stratum - Terry Therneau

#Stratified approach
#For each pair, there is an unspecified baseline hazard function. The partial likelihood idea is readily adapted by multiplying the partial likelihoods specific to each stratum.
##Pros: Lack of structure. Cons: It does not provide any information about heterogeneity between pairs; Pairs in which both members shared the same covariate information or which provide only censoring observations do not contribute to the likelihood; this is because no between-pair comparisons are attempted. Heterogeneity is not described by a single parameter as frailty;

# Austin PC. A critical appraisal of propensity-score matching in the medical literature between 1996 and 2003. STATISTICS IN MEDICINE. Statist. Med. 2008; 27:2037–2049

#https://www.duo.uio.no/bitstream/handle/10852/10289/stat-res-11-97.pdf?sequence=1&isAllowed=y

memory.limit(size = 20000)
## [1] 32565
#Classical stratified tests

#This statistics reduces to the difference in the number of events in the 2 samples which occurr while both patients in the pair are at risk given the appropiate weight. 

#Klein, J. & Moeschberger, M. (2003) Survival Analysis: Statistical Methods for Censored and Truncated Data. 2nd Edition. Springer-Verlag. 

#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
m1 <- coxph(Surv(diff_bet_treat,event) ~ strata(group_match) + tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)

summary(m1)
## Call:
## coxph(formula = Surv(diff_bet_treat, event) ~ strata(group_match) + 
##     tipo_de_plan_res, data = CONS_C1_df_dup_SEP_2020_irrs_health)
## 
##   n= 6398, number of events= 6398 
##    (16518 observations deleted due to missingness)
## 
##                     coef exp(coef) se(coef)     z Pr(>|z|)    
## tipo_de_plan_res 0.34731   1.41525  0.06943 5.002 5.67e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                  exp(coef) exp(-coef) lower .95 upper .95
## tipo_de_plan_res     1.415     0.7066     1.235     1.622
## 
## Concordance= 0.586  (se = 0.024 )
## Likelihood ratio test= 25.4  on 1 df,   p=5e-07
## Wald test            = 25.02  on 1 df,   p=6e-07
## Score (logrank) test = 25.27  on 1 df,   p=5e-07
cox.zph(m1)#Possibly, a log-normal or log-logistic AFT model would fit better than Cox.
##                  chisq df     p
## tipo_de_plan_res  9.52  1 0.002
## GLOBAL            9.52  1 0.002
m1b <- try_with_time_limit(
            survreg(Surv(diff_bet_treat+1,event)~ strata(group_match)+ tipo_de_plan_res,data=CONS_C1_df_dup_SEP_2020_irrs_health, dist="weibull"),
        elapsed = 60)
        
#The survreg function in R does not allow time = 0. This is because for several of the distributions, including the lognormal distribution, having events occur at time = 0 will result in an undefined estimator.
(m1b)

m2 <- eval_fork(
        coxph(Surv(diff_bet_treat,event) ~ frailty(group_match, 
          distribution = "gaussian", sparse = FALSE, method = "reml") + tipo_de_plan_res, 
          data = CONS_C1_df_dup_SEP_2020_irrs_health),
      timeout = 60)
summary(m2)

cox.zph(m2)
#CONS_C1_df_dup_SEP_2020$condicion_ocupacional_corr CONS_C1_df_dup_SEP_2020$cnt_diagnostico_trs_fisico CONS_C1_df_dup_SEP_2020$tenencia_de_la_vivienda_mod

##COx Diagnostics
#ggcoxzph(cox.zph(m1))
#ggcoxdiagnostics(m1, type = "dfbeta",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#ggcoxdiagnostics(m1, type = "deviance",
#                 linear.predictions = FALSE, ggtheme = theme_bw())
#It’s also possible to check outliers by visualizing the deviance residuals. The deviance residual is a normalized transform of the martingale residual. These residuals should be roughtly symmetrically distributed about zero with a standard deviation of 1.
#Positive values correspond to individuals that ā€œdied too soonā€ compared to expected survival times.
#Negative values correspond to individual that ā€œlived too longā€.
#Very large or small values are outliers, which are poorly predicted by the model.

#grid.arrange(
#  ggforest(m1, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ggforest(m2, data=CONS_C1_df_dup_SEP_2020_irrs_health),
#  ncol=2
#)


There was evidence of unproportional hazards. Users in residential treatments experience 42% within the study period than users in outpatient treatments (95% CI: 24% - 62%; p=0).


Multistate


#  dplyr::filter(motivodeegreso_mod_imp!="En curso")%>% #Sacar los tratamientos que estƩn en curso 


tab1_lab<- paste0('Original C1 Dataset \n(n = ', formatC(nrow(CONS_C1), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1%>% dplyr::distinct(HASH_KEY)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab2_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab1_5_lab<- paste0('&#8226; Duplicated entries\\l &#8226; Overlapping treatments of users\\l &#8226; Intermediate events of treatment (continuous referrals)')
tab4_lab<- paste0('Imputed C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab3_5_lab<- paste0('C1 Dataset \n(n = ', formatC(nrow(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_descartados%>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')
tab6_lab<- paste0('C1 Matched Sample\nin Treatment Setting \n(n = ', formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% nrow(), format='f', big.mark=',', digits=0), ';\nusers: ',formatC(CONS_C1_df_dup_SEP_2020_match_miss_after_imp_conservados %>% 
  dplyr::filter(hash_key %in% unlist(unique(d_match$hash_key))) %>% dplyr::distinct(hash_key)%>% nrow(), format='f', big.mark=',', digits=0),')')

lab_tab<- paste0("  Result of the matching on treatment setting\nNo. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))

#https://stackoverflow.com/questions/46750364/diagrammer-and-graphviz
#https://mikeyharper.uk/flowcharts-in-r-using-diagrammer/
#http://blog.nguyenvq.com/blog/2012/05/29/better-decision-tree-graphics-for-rpart-via-party-and-partykit/
#http://blog.nguyenvq.com/blog/2014/01/17/skeleton-to-create-fast-automatic-tree-diagrams-using-r-and-graphviz/
#https://cran.r-project.org/web/packages/DiagrammeR/vignettes/graphviz-mermaid.html
#https://stackoverflow.com/questions/39133058/how-to-use-graphviz-graphs-in-diagrammer-for-r
#https://subscription.packtpub.com/book/big_data_and_business_intelligence/9781789802566/1/ch01lvl1sec21/creating-diagrams-via-the-diagrammer-package
#https://justlegal.be/2019/05/using-flowcharts-to-display-legal-procedures/
# paste0("No. of treatments: ",table(table(t_id_1)) %>% formatC(big.mark = ","),"; No. of controls: ",table(table(c_id_1))%>% formatC(big.mark = ","))
#
library(DiagrammeR) #⋉
grViz("digraph flowchart {
      # node definitions with substituted label text
      node [fontname = Times, shape = rectangle,fontsize = 9]        
      tab1 [label = '@@1']
      tab2 [label = '@@2']
      tab3 [label = '&#8226;Duplicated entries\\l&#8226;Intermediate events of treatment (continuous referrals)\\l',fontsize = 7]
      tab4 [label = '@@4']
      blank [label = '', width = 0.0001, height = 0.0001]
      blank2 [label = '', width = 0.0001, height = 0.0001]
      blank3 [label = '', width = 0.0001, height = 0.0001]
      tab5 [label = '&#8226;Logically Inconsistent candidates for imputation\\l&#8226;Ties in candidates for imputation\\l',fontsize = 7]
      tab6 [label= '@@6']
      tab7 [label = '&#8226;Matching pairs based on balance of covariates at basline,\\l&#8226;Pairs 1:1\\l',fontsize = 7]
      
      # edge definitions with the node IDs
      tab1 -> blank [arrowhead = none,label='  Data wrangling and normalization process',fontsize = 8];
      blank -> tab3
      blank -> tab2
      tab2 -> blank2 [arrowhead = none];
      blank2 -> tab5 
      blank2 -> tab4 [label='  Result of the imputation of missing values',fontsize = 8];
      tab4 -> blank3 [arrowhead= none];
      blank3-> tab7
      blank3 -> tab6 [label='@@7',fontsize = 8];
            subgraph {
              rank = same; tab3; blank;
            }
            subgraph {
              rank = same; tab5; blank2;
            }
            subgraph {
              rank = same; tab7; blank3;
            }
      }

      [1]:  tab1_lab
      [2]:  tab2_lab
      [3]:  tab1_5_lab
      [4]:  tab4_lab
      [5]:  ''
      [6]:  tab6_lab
      [7]:  lab_tab
      ")
#      {rank=same; 'tab2'' -> tab3 [label='',fontsize = 11]}; #⋉
#CONS_C1_df_dup_SEP_2020_irrs_health
Table 6. Summary descriptives table
Variables Ambulatory Residential Sig.
N=17154 N=15122
Motive of Admission to Treatment (First Entry): <0.001
Spontaneous 6994 (40.8%) 5567 (36.8%)
Assisted Referral 2940 (17.1%) 3087 (20.4%)
Other 954 (5.56%) 919 (6.08%)
Justice Sector 1229 (7.16%) 985 (6.51%)
Health Sector 5037 (29.4%) 4564 (30.2%)
Psychiatric Comorbidity: <0.001
Without psychiatric comorbidity 4719 (27.5%) 3845 (25.4%)
Diagnosis unknown (under study) 3510 (20.5%) 3305 (21.9%)
With psychiatric comorbidity 8925 (52.0%) 7972 (52.7%)
Sexo Usuario/Sex of User: 0.069
Men 11433 (66.6%) 10224 (67.6%)
Women 5721 (33.4%) 4898 (32.4%)
Age at Admission to Treatment 32.7 [26.7;40.7] 33.0 [26.9;41.0] 0.045
Treatment Length (>90): <0.001
FALSE 14035 (81.8%) 12088 (79.9%)
TRUE 3119 (18.2%) 3028 (20.0%)
ā€˜Missing’ 0 (0.00%) 6 (0.04%)
Treatments by User (#): 0.006
1 8857 (51.6%) 7661 (50.7%)
2 4666 (27.2%) 4108 (27.2%)
3 2172 (12.7%) 1920 (12.7%)
4 924 (5.39%) 840 (5.55%)
5 316 (1.84%) 354 (2.34%)
6 157 (0.92%) 155 (1.02%)
7 44 (0.26%) 54 (0.36%)
8 18 (0.10%) 30 (0.20%)
More than one treatment: 0.084
0 8857 (51.6%) 7661 (50.7%)
1 8297 (48.4%) 7461 (49.3%)
Starting Substance: <0.001
Alcohol 7450 (43.4%) 6204 (41.0%)
Cocaine hydrochloride 746 (4.35%) 637 (4.21%)
Cocaine paste 2498 (14.6%) 2377 (15.7%)
Marijuana 6036 (35.2%) 5521 (36.5%)
Other 424 (2.47%) 383 (2.53%)
Marital Status: <0.001
Married/Shared living arrangements 4275 (24.9%) 3473 (23.0%)
Separated/Divorced 1874 (10.9%) 1565 (10.3%)
Single 10819 (63.1%) 9936 (65.7%)
Widower 186 (1.08%) 148 (0.98%)
Educational Attainment: 0.006
3-Completed primary school or less 5239 (30.5%) 4843 (32.0%)
2-Completed high school or less 8912 (52.0%) 7775 (51.4%)
1-More than high school 3003 (17.5%) 2504 (16.6%)
Frequency of use of primary drug: <0.001
1 day a week or more 592 (3.45%) 327 (2.16%)
2 to 3 days a week 2504 (14.6%) 1570 (10.4%)
4 to 6 days a week 2447 (14.3%) 1972 (13.0%)
Daily 10879 (63.4%) 10993 (72.7%)
Did not use 340 (1.98%) 106 (0.70%)
Less than 1 day a week 392 (2.29%) 154 (1.02%)
Public Center: <0.001
FALSE 9904 (57.7%) 10750 (71.1%)
TRUE 7250 (42.3%) 4372 (28.9%)
Minimum Achievement in the Therapeutic Process: <0.001
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Minimum achievement 8436 (49.2%) 6190 (40.9%)
High/Medium achievement 7554 (44.0%) 8272 (54.7%)
Drug Dependence: <0.001
FALSE 2077 (12.1%) 1316 (8.70%)
TRUE 15077 (87.9%) 13806 (91.3%)
Age of Onset of Drug Use 15.0 [14.0;17.0] 15.0 [13.0;17.0] 0.031
Occupational Status: <0.001
Employed 3816 (22.2%) 2011 (13.3%)
Inactive 1881 (11.0%) 1539 (10.2%)
Looking for a job for the first time 32 (0.19%) 23 (0.15%)
No activity 1858 (10.8%) 2134 (14.1%)
Not seeking for work 350 (2.04%) 400 (2.65%)
Unemployed 9217 (53.7%) 9015 (59.6%)
Days of Treatment (missing dates of discharge were replaced with difference from 2019-11-13) 153 [84.0;276] 151 [66.0;277] <0.001
Users with Posterior Treatments (=1): 0.084
0 8857 (51.6%) 7661 (50.7%)
1 8297 (48.4%) 7461 (49.3%)
User’s Days available in the system for the study 408 [146;1175] 401 [152;1093] 0.018
User’s Years available in the system for the study 1.12 [0.40;3.22] 1.10 [0.42;2.99] 0.018
Days of difference between the Next Treatment 347 [137;780] 263 [72.0;692] <0.001
Treatment Successful Completion: <0.001
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Completion 3150 (18.4%) 4372 (28.9%)
Non-completion 12840 (74.9%) 10090 (66.7%)
Early Drop-out & Residential Plan (=1): 0.000
0 17154 (100%) 12094 (80.0%)
1 0 (0.00%) 3028 (20.0%)
Cause of Discharge: <0.001
Administrative discharge 1475 (8.60%) 1902 (12.6%)
Early Drop-out 3119 (18.2%) 3028 (20.0%)
Late Drop-out 6047 (35.3%) 2976 (19.7%)
Ongoing treatment 1164 (6.79%) 660 (4.36%)
Referral to another treatment 2199 (12.8%) 2184 (14.4%)
Therapeutic discharge 3150 (18.4%) 4372 (28.9%)
Note. Variables of C1 dataset had to be standardized before comparison;
Continuous variables are presented as Medians and Percentiles 25 and 75 were shown;
Categorical variables are presented as number (%)


After matching, we selected 32,276 treatments (users=22,916).


#NOT DUPLICATES
#d_match_surv %>% 
#    dplyr::group_by(hash_key) %>% 
#    dplyr::mutate(dis_hash=n_distinct(n_hash)) %>% 
#    dplyr::ungroup() %>% 
#    dplyr::filter(dis_hash>1)

set.seed(2125)
random__users <- 
  d_match_surv %>% 
  dplyr::group_by(n_hash) %>% 
  summarise() %>% 
  sample_n(1000)

d_match_surv_sub_sample<-
d_match_surv %>% 
  dplyr::filter(n_hash %in% unlist(random__users))

library("frailtySurv")
memory.limit(size = 20000)
#frailty_1<-
#fitfrail(Surv(dias_treat_imp_sin_na,event)~ tipo_de_plan_res+ cluster(hash_key),d_match_surv,frailty="gamma")

m3 <-  R.utils::withTimeout(
  coxph(Surv(dias_treat_imp_sin_na,event) ~ tipo_de_plan_res + frailty(hash_key, 
    distribution = "gaussian", sparse = FALSE, method = "reml"), data = d_match_surv),
   timeout = 6000)
summary(m3)

#cox.zph(m3)#Possibly, a log-normal or log-logistic AFT model would fit better than Cox.


library("mstate")
vector_surv_msm<- c("tr_completion","diff_bet_treat","dias_treat_imp_sin_na","min_achievement","abandono_temprano_rec","tipo_de_plan_res")
# dias_treat_imp_sin_na 

#NECESITO CREAR UNA BASE DE DATOS CON LA ENTRADA A UN TRATAMIENTO COMO UN EVENTO INDEPENDIENTE, EN UNA FILA SEPARADA DEL TƉRMINO DE TRATAMIENTO EN LONG.

d_match_surv %>% 
    dplyr::select(c("hash_key","dup","fech_ing_num",vector_surv_msm)) %>% 
    dplyr::arrange(hash_key,fech_ing_num) %>% #glimpse()
    dplyr::group_by(hash_key) %>% 
    #genero los aƱos en el estudio para cada tratamiento
    dplyr::mutate(years_1=(fech_ing_num-min(fech_ing_num))/365.25) %>% 
    #genero un segundo valor con el de los aƱos en estudio
    dplyr::mutate(years_2=dias_treat_imp_sin_na/365.25+years_1) %>% 
    dplyr::ungroup() %>% #glimpse()
    tidyr::pivot_longer(
     cols = starts_with("years_"),
     names_to = "events_within_tr",
     names_prefix = "years_",
     values_to = "years_in_study",
     values_drop_na = TRUE
    ) %>% 
  dplyr::select(-fech_ing_num,-diff_bet_treat) %>% 
  #Eventos entre tratamientos
  dplyr::mutate(events_within_tr=as.numeric(events_within_tr)) %>% 
  dplyr::group_by(hash_key) %>% 
  dplyr::mutate(event_num=row_number()) %>% 
  dplyr::ungroup() %>%
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  dplyr::mutate(state=dplyr::case_when(
    #1)entrar a tratamiento residencial -Enter to a Residential Treatment (transient)
    events_within_tr==1 & tipo_de_plan_res==1~1,
    #2)completar tratamiento residencial -Complete a Residential Treatment (transient)
    tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==1~2,
    #3)no completar tratamiento residencial -Not Complete a Residential Treatment (transient)
    tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==1~3,
    #4)entrar a tratamiento ambulatorio -Enter to an Outpatient Treatment (transient)
    events_within_tr==1 & tipo_de_plan_res==0~4,
    #5)completar tratamiento ambulatorio -Complete an Outpatient Treatment (transient)
    tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==0~5,
    #6)no-completar tratamiento ambulatorio -Not Complete an Outpatient Treatment (transient)
    tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==0~6,
    #7)Censored Residential
    grepl("Ongoing",tr_completion) & tipo_de_plan_res==1 & events_within_tr==2~7,
    #7)Censored Outpatient
    grepl("Ongoing",tr_completion) & tipo_de_plan_res==0 & events_within_tr==2~8
    #tr_completion=="Ongoing treatment" & events_within_tr==2 & tipo_de_plan_res==0~9,
    #tr_completion=="Ongoing treatment" & events_within_tr==2 & tipo_de_plan_res==1~10
    )) %>% 
    dplyr::rename("PTNUM"="hash_key") %>% 
  #filtrar los que no han terminado su tratmaiento- no deberĆ­a filtrarlos, porque quito el tiempo absorvido
  #dplyr::mutate(censored=dplyr::case_when(tr_completion=="Ongoing treatment" & events_within_tr==2~1,TRUE~0)) %>% 
  #dplyr::filter(censored==0) %>% 
  #janitor::tabyl(min_achievement,tr_completion)
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  #interval censoring. Continuous events should be differenced with the previous year in study with the minimum (0.001)
  #check if ordered
  dplyr::group_by(PTNUM) %>% 
  dplyr::mutate(diff_bet_events=years_in_study-lag(years_in_study)) %>% 
  dplyr::ungroup() %>% 
  #there are some values that were generated that still have minimum and arbitrary differences between events. We converted into 0.
  dplyr::mutate(diff_bet_events=dplyr::case_when(diff_bet_events<=.00001~0,
                                                 TRUE~diff_bet_events),
                years_in_study_corr=dplyr::case_when(diff_bet_events<=0~years_in_study+.00001,
                                                     TRUE~years_in_study)) %>% 
  dplyr::mutate(diff_bet_events=dplyr::case_when(diff_bet_events==0~.00001,
                                                 TRUE~diff_bet_events)) %>% 
  assign("d_match_surv_msm",.,envir=.GlobalEnv)


attr(d_match_surv_msm$years_in_study,"label") <- "Years in study"
attr(d_match_surv_msm$years_in_study_corr,"label") <- "Years in study (corrected for interval censored events)"
attr(d_match_surv_msm$events_within_tr,"label") <- "Years in study"
attr(d_match_surv_msm$event_num,"label") <- "Order of events (includes events at admission and at discharge, separately)"
attr(d_match_surv_msm$state,"label") <- "State (1 to 7)"

  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  # 284 cases that changed once imputed the null differences with the posterior event in time
paste0(d_match_surv_msm %>% 
    dplyr::filter(years_in_study!=years_in_study_corr) %>% nrow()," cases that changed once imputed the null differences with the posterior event in time (were corrected)")
## [1] "284 cases that changed once imputed the null differences with the posterior event in time (were corrected)"
  # 0 cases with negative differences with the posterior event in time (not possible)
paste0(d_match_surv_msm %>% 
    dplyr::filter(diff_bet_events<0) %>% nrow()," cases with negative differences with the posterior event in time (not possible)")
## [1] "0 cases with negative differences with the posterior event in time (not possible)"
  #_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
  # Export wide format
d_match_surv_msm %>% 
  dplyr::select(PTNUM,event_num,state,years_in_study_corr,min_achievement) %>% 
  dplyr::mutate_at(vars(c("state","min_achievement","years_in_study_corr")),~as.character(.)) %>% 
    tidyr::pivot_longer(
     cols = -c("PTNUM","event_num"),
     names_to = "varswide",
     values_drop_na = F
    ) %>% 
    tidyr::pivot_wider(
                       names_from=c(varswide,event_num),
                       names_glue ="{varswide}_{event_num}",
                       values_from=value
                       ) %>% 
  dplyr::mutate_at(vars(starts_with("state_")),~as.numeric(.)) %>% 
  dplyr::mutate_at(vars(starts_with("years_in_study_corr_")),~as.numeric(.)) %>% 
assign("d_match_surv_msm_wide",.,envir=.GlobalEnv)

for (i in c(1:16)){  
    attr(d_match_surv_msm_wide[,paste0("state_",i)],"label") <-paste0("State (1 to 7) at s=",i)
    
        attr(d_match_surv_msm_wide[,paste0("years_in_study_corr_",i)],"label") <-paste0("Years in study (corrected for interval censored events) at s=",i)
    
    attr(d_match_surv_msm_wide[,paste0("min_achievement_",i)],"label") <-paste0("Achievement in the Therapeutic Process at s=",i)

}

rio::export(d_match_surv_msm_wide,"G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/d_match_surv_msm_wide.dta")
## Error in write_dta_(data, normalizePath(path, mustWork = FALSE), version = stata_file_format(version), : Failed to open 'G:\Mi unidad\Alvacast\SISTRAT 2019 (github)\d_match_surv_msm_wide.dta' for writing
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_#_
#Q matrix
#_#_#_#_#_#_
#_#_#_#_#_#_
library(msm)
#If an instantaneous transition is not allowed from state  to state , then Q should have  entry 0, otherwise it should be non-zero. The diagonal entries are ignored.
#entries define transition/movement probabilities of subjects through states
#probability of being in state j at time t + s, conditional on being in state i at time s
# in time homogeneous intensities the dependence of P on time will be omitted ---> no debiese ser mi caso
# each row must sum to one for an example row 1 summation equals to 1
# transition intensity matrix Q
 qmatrix<-matrix(statetable.msm(state, PTNUM, data = d_match_surv_msm),nrow=6,ncol=8)

 model1<- 
msm::statetable.msm(state, PTNUM, data = d_match_surv_msm)
model1%>% 
    data.table::data.table() %>% 
    dplyr::filter(N>0) %>% 
    dplyr::arrange(desc(N)) %>% 
    dplyr::mutate("%"=scales::percent(round(N/sum(N),1))) %>% 
    dplyr::mutate(from=dplyr::case_when(from==1~"Enter to a Residential Treatment(1)",
                                      from==2~"Complete a Residential Treatment(2)",
                                      from==3~"Not Complete a Residential Treatment(3)",
                                      from==4~"Enter to an Outpatient Treatment(4)",
                                      from==5~"Complete an Outpatient Treatment(5)",
                                      from==6~"Not Complete an Outpatient Treatment(6)",
                                      from==7~"Ongoing treatment (Residential)(Censored)(7)",
                                      from==8~"Ongoing treatment (Outpatient)(Censored)(8)")) %>% 
    dplyr::mutate(to=dplyr::case_when(to==1~"Enter to a Residential Treatment(1)",
                                      to==2~"Complete a Residential Treatment(2)",
                                      to==3~"Not Complete a Residential Treatment(3)",
                                      to==4~"Enter to an Outpatient Treatment(4)",
                                      to==5~"Complete an Outpatient Treatment(5)",
                                      to==6~"Not Complete an Outpatient Treatment(6)",
                                      to==7~"Ongoing treatment(Residential)(Censored)(7)",
                                      to==8~"Ongoing treatment(Outpatient)(Censored)(8)")) %>%   
  knitr::kable(format= "html", format.args= list(decimal.mark= ".", big.mark= ","),
               caption="Table 7. State Transition Matrix",
               align= c("c",rep('c', 5)))%>%
  kableExtra::kable_styling(bootstrap_options = c("striped", "hover"),font_size= 11) %>% 
  kableExtra::add_footnote("Note= Excluded transitions that were not found")
Table 7. State Transition Matrix
from to N %
Enter to an Outpatient Treatment(4) Not Complete an Outpatient Treatment(6) 12,840 30%
Enter to a Residential Treatment(1) Not Complete a Residential Treatment(3) 10,090 20%
Enter to a Residential Treatment(1) Complete a Residential Treatment(2) 4,372 10%
Enter to an Outpatient Treatment(4) Complete an Outpatient Treatment(5) 3,150 10%
Not Complete an Outpatient Treatment(6) Enter to an Outpatient Treatment(4) 2,435 10%
Not Complete a Residential Treatment(3) Enter to an Outpatient Treatment(4) 1,978 0%
Not Complete a Residential Treatment(3) Enter to a Residential Treatment(1) 1,701 0%
Enter to an Outpatient Treatment(4) Ongoing treatment(Outpatient)(Censored)(8) 1,164 0%
Not Complete an Outpatient Treatment(6) Enter to a Residential Treatment(1) 1,091 0%
Complete a Residential Treatment(2) Enter to an Outpatient Treatment(4) 800 0%
Complete a Residential Treatment(2) Enter to a Residential Treatment(1) 704 0%
Enter to a Residential Treatment(1) Ongoing treatment(Residential)(Censored)(7) 660 0%
Complete an Outpatient Treatment(5) Enter to an Outpatient Treatment(4) 483 0%
Complete an Outpatient Treatment(5) Enter to a Residential Treatment(1) 168 0%
a Note= Excluded transitions that were not found


We restricted the transitions following several criteria: cannot enter/complete/non-complete treatment two sequential times (e.g.Ā 1 -> 4), or enter to a residential/outpatient treatment and complete an outpatient/residential (e.g.Ā 4->2).


Figure 18. Markov Influence Diagram of possible pathways of a user

Figure 18. Markov Influence Diagram of possible pathways of a user


#msm2Surv(d_match_surv_msm, "PTNUM", "years_in_study_corr", "state", "min_achievement", Q=qmatrix)
 #Error in rowSums(Qf) : 'x' must be an array of at least two dimensions

#
#All other off-diagonal positions contain an initial value for the corresponding transition intensity.
##this matrix should usually only allow transitions between adjacent states
msm_mat_cens_allowed <- matrix(c(
                          0,1,1,0,0,0,
                          1,0,0,1,0,0,
                          1,0,0,1,0,0,
                          0,0,0,0,1,1,
                          1,0,0,1,0,0,
                          1,0,0,1,0,0
                          ), nrow=6, ncol=6,
                          byrow=TRUE,
                          dimnames=list(from=1:6,to=1:6))
# If exacttimes is set to TRUE, then the observation times are assumed to represent the exact times of transition of the process. 
# numerical overflow in calculating likelihood --> this was corrected with "control argument" #https://github.com/chjackson/msm/issues/5

#If using gen.inits=TRUE then the non-zero entries in the qmatrix can be anything you like (conventionally 1).
#obstype= A vector specifying the observation scheme for each row of the data
msm_model_cens<- msm(state ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm, qmatrix = msm_mat_cens_allowed, gen.inits=T,control=list(fnscale=4000,reltol = 1e-16),censor = c(7,8),censor.states = list(c(3,4),c(5,6)),obstype=1)
#si pongo obstype=2, Error in if (any(q2 < 0)) stop("off-diagonal entries of qmatrix should not be negative") : 


print(msm_model_cens)
## 
## Call:
## msm(formula = state ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm,     qmatrix = msm_mat_cens_allowed, gen.inits = T, obstype = 1,     censor = c(7, 8), censor.states = list(c(3, 4), c(5, 6)),     control = list(fnscale = 4000, reltol = 1e-16))
## 
## Maximum likelihood estimates
## 
## Transition intensities
##       Baseline                          
## 1 - 1 -1.016e+05 (-1.344e+05,-7.682e+04)
## 1 - 2  4.153e+00 ( 3.918e+00, 4.402e+00)
## 1 - 3  1.016e+05 ( 7.682e+04, 1.344e+05)
## 2 - 1  1.682e+00 ( 1.540e+00, 1.837e+00)
## 2 - 2 -2.127e+00 (-2.283e+00,-1.982e+00)
## 2 - 4  4.450e-01 ( 4.092e-01, 4.840e-01)
## 3 - 1  3.405e+04 ( 2.568e+04, 4.514e+04)
## 3 - 3 -3.405e+04 (-4.514e+04,-2.568e+04)
## 3 - 4  2.354e-01 ( 2.185e-01, 2.535e-01)
## 4 - 4 -3.757e+01 (-4.171e+01,-3.385e+01)
## 4 - 5  1.628e+00 ( 1.541e+00, 1.720e+00)
## 4 - 6  3.594e+01 ( 3.224e+01, 4.007e+01)
## 5 - 1  1.483e-01 ( 1.212e-01, 1.814e-01)
## 5 - 4  1.103e+00 ( 1.006e+00, 1.210e+00)
## 5 - 5 -1.251e+00 (-1.356e+00,-1.154e+00)
## 6 - 1  1.367e-01 ( 1.257e-01, 1.488e-01)
## 6 - 4  1.334e+01 ( 1.189e+01, 1.496e+01)
## 6 - 6 -1.347e+01 (-1.509e+01,-1.203e+01)
## 
## -2 * log-likelihood:  105317.7 
## [Note, to obtain old print format, use "printold.msm"]
summary(msm_model_cens)
## $prevalences
## $prevalences$Observed
##                  State 1 State 2 State 3 State 4 State 5 State 6 Total
## 0                  11458       0       0   11458       0       0 22916
## 1.13456536618754    1739     608    1382    2190     218    1374  7511
## 2.26913073237509     441     526     953     769     261    1065  4015
## 3.40369609856263     238     370     587     474     198     747  2614
## 4.53826146475017     138     239     381     328     146     527  1759
## 5.67282683093771     103     141     224     230      94     310  1102
## 6.80739219712526      70      81     115     145      55     190   656
## 7.9419575633128       36      36      58      89      31      78   328
## 9.07652292950034      25      10      16      46      10      24   131
## 10.2110882956879       1       1       1       9       1       1    14
## 11.3456536618754       0       0       0       0       0       1     1
## 
## $prevalences$Expected
##                             1            2            3            4
## 0                1.145800e+04    0.0000000    0.0000000 1.145800e+04
## 1.13456536618754 5.484447e+02 1084.4749303 1636.9032564 9.061562e+02
## 2.26913073237509 2.569283e+02  525.3736411  766.8335734 5.070738e+02
## 3.40369609856263 1.541759e+02  311.2396801  460.1561786 3.435847e+02
## 4.53826146475017 9.815273e+01  196.0386914  292.9482527 2.373937e+02
## 5.67282683093771 5.925064e+01  117.4465208  176.8403628 1.512454e+02
## 6.80739219712526 3.441720e+01   67.8686490  102.7220662 9.099695e+01
## 7.9419575633128  1.693552e+01   33.2801060   50.5459985 4.580696e+01
## 9.07652292950034 6.694101e+00   13.1245714   19.9793039 1.837370e+01
## 10.2110882956879 7.106284e-01    1.3911945    2.1209512 1.968993e+00
## 11.3456536618754 5.054110e-02    0.0988486    0.1508456 1.408888e-01
##                            5            6 Total
## 0                  0.0000000    0.0000000 22916
## 1.13456536618754 921.8132495 2413.2077006  7511
## 2.26913073237509 610.3366420 1348.4539734  4015
## 3.40369609856263 430.3217575  914.5217958  2614
## 4.53826146475017 302.0978073  632.3687752  1759
## 5.67282683093771 194.1219062  403.0951826  1102
## 6.80739219712526 117.3931985  242.6019380   656
## 7.9419575633128   59.2828136  122.1485998   328
## 9.07652292950034  23.8267070   49.0016167   131
## 10.2110882956879   2.5566028    5.2516297    14
## 11.3456536618754   0.1830823    0.3757935     1
## 
## $prevalences$`Observed percentages`
##                    State 1   State 2   State 3  State 4  State 5    State 6
## 0                50.000000  0.000000  0.000000 50.00000 0.000000   0.000000
## 1.13456536618754 23.152709  8.094794 18.399680 29.15724 2.902410  18.293170
## 2.26913073237509 10.983811 13.100872 23.735990 19.15318 6.500623  26.525529
## 3.40369609856263  9.104820 14.154552 22.456006 18.13313 7.574598  28.576894
## 4.53826146475017  7.845367 13.587265 21.660034 18.64696 8.300171  29.960205
## 5.67282683093771  9.346642 12.794918 20.326679 20.87114 8.529946  28.130672
## 6.80739219712526 10.670732 12.347561 17.530488 22.10366 8.384146  28.963415
## 7.9419575633128  10.975610 10.975610 17.682927 27.13415 9.451220  23.780488
## 9.07652292950034 19.083969  7.633588 12.213740 35.11450 7.633588  18.320611
## 10.2110882956879  7.142857  7.142857  7.142857 64.28571 7.142857   7.142857
## 11.3456536618754  0.000000  0.000000  0.000000  0.00000 0.000000 100.000000
## 
## $prevalences$`Expected percentages`
##                          1         2        3        4        5        6
## 0                50.000000  0.000000  0.00000 50.00000  0.00000  0.00000
## 1.13456536618754  7.301886 14.438489 21.79341 12.06439 12.27284 32.12898
## 2.26913073237509  6.399211 13.085271 19.09922 12.62949 15.20141 33.58540
## 3.40369609856263  5.898083 11.906644 17.60353 13.14402 16.46219 34.98553
## 4.53826146475017  5.580030 11.144894 16.65425 13.49595 17.17441 35.95047
## 5.67282683093771  5.376646 10.657579 16.04722 13.72463 17.61542 36.57851
## 6.80739219712526  5.246524 10.345831 15.65885 13.87149 17.89530 36.98200
## 7.9419575633128   5.163269 10.146374 15.41037 13.96554 18.07403 37.24043
## 9.07652292950034  5.110000 10.018757 15.25138 14.02573 18.18833 37.40581
## 10.2110882956879  5.075917  9.937104 15.14965 14.06424 18.26145 37.51164
## 11.3456536618754  5.054110  9.884860 15.08456 14.08888 18.30823 37.57935
## 
## 
## $hazard
## NULL
## 
## $hazard.scale
## NULL
## 
## attr(,"class")
## [1] "summary.msm"
# If censor is a vector with more than one element, this should be a list, with each element a vector corresponding to the equivalent element of censor.
#1: In msm.form.cmodel(censor, censor.states, qmatrix) : some censoring indicators are the same as actual states
#2: In msm.form.cmodel(censor, censor.states, qmatrix) : some censoring indicators are the same as actual states


#CƓMO SETTEAR LA BASE DE DATOS PARA PONER COMO CENSURA EL ONGOING TREATMENT, Y LA COMPLETACIƓN DEL TRATAMIENTO COMO ALGO IMPORTANTE.


Since users with more than 2 readmissions covered around 3% of the matched sample at baseline, we decided to restrict our analysis to the possibility to experience a second readmission, as shown in the figure below.


Figure 19. Markov Influence Diagram of possible pathways of a user (up to 3 treatments)

Figure 19. Markov Influence Diagram of possible pathways of a user (up to 3 treatments)


d_match_surv_msm %>% 
    dplyr::filter(event_num<=6) %>% 
    dplyr::mutate(state_up3=dplyr::case_when(
      events_within_tr==1 & tipo_de_plan_res==1 & dup==1~1,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==1~2,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==1~3,
      events_within_tr==1 & tipo_de_plan_res==0 & dup==1~4,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==1~5,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==1~6,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==1 & events_within_tr==2 & dup==1~91,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==0 & events_within_tr==2 & dup==1~92,
      
      events_within_tr==1 & tipo_de_plan_res==1 & dup==2~7,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==2~8,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==2~9,
      events_within_tr==1 & tipo_de_plan_res==0 & dup==2~10,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==2~11,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==2~12,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==1 & events_within_tr==2 & dup==2~93,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==0 & events_within_tr==2 & dup==2~94,
      
      events_within_tr==1 & tipo_de_plan_res==1 & dup==3~13,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==3~14,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==1 & dup==3~15,
      events_within_tr==1 & tipo_de_plan_res==0 & dup==3~16,
      tr_completion=="Completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==3~17,
      tr_completion=="Non-completion" & events_within_tr==2 & tipo_de_plan_res==0 & dup==3~18,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==1 & events_within_tr==2 & dup==3~95,
      grepl("Ongoing",tr_completion) & tipo_de_plan_res==0 & events_within_tr==2 & dup==3~96)) %>%
  assign("d_match_surv_msm_up_3",.,envir=.GlobalEnv)

d_match_surv_msm_up_3 %>% 
  dplyr::select(PTNUM,event_num,state_up3,years_in_study_corr,min_achievement) %>% 
  dplyr::mutate_at(vars(c("state_up3","min_achievement","years_in_study_corr")),~as.character(.)) %>% 
    tidyr::pivot_longer(
     cols = -c("PTNUM","event_num"),
     names_to = "varswide",
     values_drop_na = F
    ) %>% 
    tidyr::pivot_wider(
                       names_from=c(varswide,event_num),
                       names_glue ="{varswide}_{event_num}",
                       values_from=value
                       ) %>% 
  dplyr::mutate_at(vars(starts_with("state_")),~as.numeric(.)) %>% 
  dplyr::mutate_at(vars(starts_with("years_in_study_corr_")),~as.numeric(.)) %>% 
assign("d_match_surv_msm_up_3_wide",.,envir=.GlobalEnv)

rio::export(d_match_surv_msm_up_3_wide,"G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/d_match_surv_msm_up_3_wide.dta")
## Error in write_dta_(data, normalizePath(path, mustWork = FALSE), version = stata_file_format(version), : Failed to open 'G:\Mi unidad\Alvacast\SISTRAT 2019 (github)\d_match_surv_msm_up_3_wide.dta' for writing
msm_mat_cens_allowed_up_3 <- matrix(c(
                            0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,
                            0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,
                            0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,1,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
                            0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
                          ), nrow=18, ncol=18,
                          byrow=TRUE,
                          dimnames=list(from=1:18,to=1:18))


msm_model_cens_up3<- msm(state_up3 ~ years_in_study_corr, subject = PTNUM, data = d_match_surv_msm_up_3, qmatrix = msm_mat_cens_allowed_up_3, gen.inits=T,control=list(fnscale=4000,reltol = 1e-16),censor = c(91,92,93,94,95,96),censor.states = list(c(2,3),c(5,6),c(8,9),c(11,12),c(14,15),c(17,18)),obstype=1)

print(msm_model_cens_up3)
## 
## Call:
## msm(formula = state_up3 ~ years_in_study_corr, subject = PTNUM,     data = d_match_surv_msm_up_3, qmatrix = msm_mat_cens_allowed_up_3,     gen.inits = T, obstype = 1, censor = c(91, 92, 93, 94, 95,         96), censor.states = list(c(2, 3), c(5, 6), c(8, 9),         c(11, 12), c(14, 15), c(17, 18)), control = list(fnscale = 4000,         reltol = 1e-16))
## 
## Optimisation probably not converged to the maximum likelihood.
## optim() reported convergence but estimated Hessian not positive-definite.
## 
## -2 * log-likelihood:  156831.5 
## [Note, to obtain old print format, use "printold.msm"]
summary(msm_model_cens_up3)
## $prevalences
## $prevalences$Observed
##                  State 1 State 2 State 3 State 4 State 5 State 6 State 7
## 0                  11458       0       0   11458       0       0       0
## 1.13456536618754    1284     582    1230    1420     207    1195     421
## 2.26913073237509      85     439     714     128     207     736     257
## 3.40369609856263       5     268     394       2     134     426     133
## 4.53826146475017       1     152     228       0      82     242      72
## 5.67282683093771       0      79     122       0      47     125      45
## 6.80739219712526       0      37      55       0      27      62      27
## 7.9419575633128        0      14      23       0       9      17      16
## 9.07652292950034       0       4       4       0       4       7       4
## 10.2110882956879       0       1       0       0       0       1       0
## 11.3456536618754       0       0       0       0       0       0       0
##                  State 8 State 9 State 10 State 11 State 12 State 13 State 14
## 0                      0       0        0        0        0        0        0
## 1.13456536618754      25     141      711       11      160       32        5
## 2.26913073237509      80     190      502       53      272       84       32
## 3.40369609856263      69     139      297       51      230       76       83
## 4.53826146475017      48      96      159       46      181       44      127
## 5.67282683093771      32      59       99       31      118       25      158
## 6.80739219712526      20      31       59       16       74       22      179
## 7.9419575633128        9      20       38       13       32        4      196
## 9.07652292950034       4       7       18        4        8       11      201
## 10.2110882956879       0       1        5        0        0        0      208
## 11.3456536618754       0       0        0        0        1        0      208
##                  State 15 State 16 State 17 State 18 Total
## 0                       0        0        0        0 22916
## 1.13456536618754       29       55        5       36  7549
## 2.26913073237509      163      121       25      220  4308
## 3.40369609856263      318      138       73      456  3292
## 4.53826146475017      430      107      110      652  2777
## 5.67282683093771      496       75      144      795  2450
## 6.80739219712526      541       45      162      889  2246
## 7.9419575633128       577       24      174      953  2119
## 9.07652292950034      582       14      186      994  2052
## 10.2110882956879      593        1      187     1021  2018
## 11.3456536618754      594        0      187     1022  2012
## 
## $prevalences$Expected
##                             1           2            3            4           5
## 0                1.145800e+04   0.0000000    0.0000000 1.145800e+04   0.0000000
## 1.13456536618754 4.847742e+02 637.9912199 1351.9261621 5.227200e+02 445.0659488
## 2.26913073237509 3.553087e+01 223.5085361  435.6637224 4.131094e+01 174.9504233
## 3.40369609856263 3.487146e+00  87.5039556  154.9626386 4.371786e+00  77.2960544
## 4.53826146475017 3.778037e-01  36.3313413   58.0814743 5.107220e-01  36.3185349
## 5.67282683093771 4.280914e-02  15.6168826   22.4742418 6.239998e-02  17.6867783
## 6.80739219712526 5.040345e-03   6.9567347    9.0022573 7.922054e-03   8.9298511
## 7.9419575633128  6.107473e-04   3.1870532    3.7069461 1.035067e-03   4.6373686
## 9.07652292950034 7.596054e-05   1.4983656    1.5662658 1.388113e-04   2.4715104
## 10.2110882956879 9.594270e-06   0.7153563    0.6720021 1.890507e-05   1.3376273
## 11.3456536618754 1.228567e-06   0.3462457    0.2922985 2.610326e-06   0.7339507
##                            6           7           8          9          10
## 0                   0.000000   0.0000000   0.0000000   0.000000   0.0000000
## 1.13456536618754 1686.004361 388.1214051 116.7466008 272.657599 737.5611019
## 2.26913073237509  616.512035 189.5110730 131.8448184 302.341492 397.9281886
## 3.40369609856263  250.729357  80.6242216  92.0104554 206.860159 180.8508281
## 4.53826146475017  107.881034  33.9354275  53.6860439 118.286397  79.5356658
## 5.67282683093771   48.006616  14.5180564  28.7194986  62.043388  35.1305851
## 6.80739219712526   22.130320   6.4247581  14.8564041  31.502829  15.9624466
## 7.9419575633128    10.490310   2.9338934   7.5765403  15.792695   7.4663767
## 9.07652292950034    5.102864   1.3817048   3.8596263   7.921405   3.5975723
## 10.2110882956879    2.520632   0.6643108   1.9615146   3.970869   1.7684128
## 11.3456536618754    1.262294   0.3255432   0.9988273   1.998013   0.8854669
##                          11         12         13        14        15
## 0                  0.000000   0.000000   0.000000   0.00000   0.00000
## 1.13456536618754 125.531587 479.983963  68.528261  13.79444  35.48089
## 2.26913073237509 163.585109 594.474962 129.556001  72.76430 187.15818
## 3.40369609856263 129.924559 445.965339 114.489396 138.60146 356.49894
## 4.53826146475017  85.420151 275.859529  76.232793 182.17303 468.57005
## 5.67282683093771  51.114521 155.018203  44.335851 202.03591 519.65967
## 6.80739219712526  29.407297  83.731560  24.324565 208.98146 537.52442
## 7.9419575633128   16.597608  44.407019  12.982297 210.21432 540.69549
## 9.07652292950034   9.314708  23.456402   6.868692 210.59572 541.67650
## 10.2110882956879   5.191962  12.332646   3.608948 210.84185 542.30957
## 11.3456536618754   2.886753   6.484519   1.894340 212.19077 545.77913
##                          16        17         18 Total
## 0                  0.000000   0.00000    0.00000 22916
## 1.13456536618754 117.295282  11.30669   53.51028  7549
## 2.26913073237509 243.743974  64.12694  303.48844  4308
## 3.40369609856263 231.248362 128.48842  608.08692  3292
## 4.53826146475017 162.283582 174.70482  826.81160  2777
## 5.67282683093771  98.151760 198.05652  937.32631  2450
## 6.80739219712526  55.489250 207.71703  983.04585  2246
## 7.9419575633128   30.329221 210.72060  997.26061  2119
## 9.07652292950034  16.365391 212.17576 1004.14730  2052
## 10.2110882956879   8.744225 213.05441 1008.30564  2018
## 11.3456536618754   4.657691 214.78209 1016.48207  2012
## 
## $prevalences$`Observed percentages`
##                      State 1     State 2    State 3     State 4   State 5
## 0                50.00000000  0.00000000  0.0000000 50.00000000 0.0000000
## 1.13456536618754 17.00887535  7.70963041 16.2935488 18.81043847 2.7420850
## 2.26913073237509  1.97307335 10.19034355 16.5738162  2.97121634 4.8050139
## 3.40369609856263  0.15188335  8.14094775 11.9684083  0.06075334 4.0704739
## 4.53826146475017  0.03601008  5.47353259  8.2102989  0.00000000 2.9528268
## 5.67282683093771  0.00000000  3.22448980  4.9795918  0.00000000 1.9183673
## 6.80739219712526  0.00000000  1.64737311  2.4487979  0.00000000 1.2021371
## 7.9419575633128   0.00000000  0.66068900  1.0854176  0.00000000 0.4247286
## 9.07652292950034  0.00000000  0.19493177  0.1949318  0.00000000 0.1949318
## 10.2110882956879  0.00000000  0.04955401  0.0000000  0.00000000 0.0000000
## 11.3456536618754  0.00000000  0.00000000  0.0000000  0.00000000 0.0000000
##                      State 6   State 7   State 8    State 9   State 10
## 0                 0.00000000 0.0000000 0.0000000 0.00000000  0.0000000
## 1.13456536618754 15.82991125 5.5768976 0.3311697 1.86779706  9.4184660
## 2.26913073237509 17.08449396 5.9656453 1.8570102 4.41039926 11.6527391
## 3.40369609856263 12.94046173 4.0400972 2.0959903 4.22235723  9.0218712
## 4.53826146475017  8.71444004 2.5927260 1.7284840 3.45696795  5.7256032
## 5.67282683093771  5.10204082 1.8367347 1.3061224 2.40816327  4.0408163
## 6.80739219712526  2.76046305 1.2021371 0.8904720 1.38023152  2.6268923
## 7.9419575633128   0.80226522 0.7550731 0.4247286 0.94384143  1.7932987
## 9.07652292950034  0.34113060 0.1949318 0.1949318 0.34113060  0.8771930
## 10.2110882956879  0.04955401 0.0000000 0.0000000 0.04955401  0.2477701
## 11.3456536618754  0.00000000 0.0000000 0.0000000 0.00000000  0.0000000
##                   State 11   State 12  State 13    State 14   State 15
## 0                0.0000000 0.00000000 0.0000000  0.00000000  0.0000000
## 1.13456536618754 0.1457147 2.11948602 0.4238972  0.06623394  0.3841568
## 2.26913073237509 1.2302693 6.31383473 1.9498607  0.74280409  3.7836583
## 3.40369609856263 1.5492102 6.98663426 2.3086270  2.52126367  9.6597813
## 4.53826146475017 1.6564638 6.51782499 1.5844436  4.57328052 15.4843356
## 5.67282683093771 1.2653061 4.81632653 1.0204082  6.44897959 20.2448980
## 6.80739219712526 0.7123776 3.29474622 0.9795191  7.96972395 24.0872663
## 7.9419575633128  0.6134969 1.51014630 0.1887683  9.24964606 27.2298254
## 9.07652292950034 0.1949318 0.38986355 0.5360624  9.79532164 28.3625731
## 10.2110882956879 0.0000000 0.00000000 0.0000000 10.30723489 29.3855302
## 11.3456536618754 0.0000000 0.04970179 0.0000000 10.33797217 29.5228628
##                    State 16   State 17   State 18
## 0                0.00000000 0.00000000  0.0000000
## 1.13456536618754 0.72857332 0.06623394  0.4768844
## 2.26913073237509 2.80872795 0.58031569  5.1067781
## 3.40369609856263 4.19198056 2.21749696 13.8517618
## 4.53826146475017 3.85307886 3.96110911 23.4785740
## 5.67282683093771 3.06122449 5.87755102 32.4489796
## 6.80739219712526 2.00356189 7.21282280 39.5814782
## 7.9419575633128  1.13260972 8.21142048 44.9740444
## 9.07652292950034 0.68226121 9.06432749 48.4405458
## 10.2110882956879 0.04955401 9.26660059 50.5946482
## 11.3456536618754 0.00000000 9.29423459 50.7952286
## 
## $prevalences$`Expected percentages`
##                             1          2           3            4          5
## 0                5.000000e+01 0.00000000  0.00000000 5.000000e+01 0.00000000
## 1.13456536618754 6.421701e+00 8.45133422 17.90867879 6.924361e+00 5.89569412
## 2.26913073237509 8.247649e-01 5.18822043 10.11289978 9.589354e-01 4.06105904
## 3.40369609856263 1.059279e-01 2.65807885  4.70724905 1.328003e-01 2.34799679
## 4.53826146475017 1.360474e-02 1.30829461  2.09151870 1.839114e-02 1.30783345
## 5.67282683093771 1.747312e-03 0.63742378  0.91731599 2.546938e-03 0.72190932
## 6.80739219712526 2.244143e-04 0.30973885  0.40081288 3.527183e-04 0.39758910
## 7.9419575633128  2.882243e-05 0.15040364  0.17493847 4.884698e-05 0.21884703
## 9.07652292950034 3.701781e-06 0.07301977  0.07632874 6.764682e-06 0.12044397
## 10.2110882956879 4.754346e-07 0.03544878  0.03330040 9.368219e-07 0.06628480
## 11.3456536618754 6.106198e-08 0.01720903  0.01452776 1.297379e-07 0.03647866
##                            6          7          8          9         10
## 0                 0.00000000 0.00000000 0.00000000 0.00000000 0.00000000
## 1.13456536618754 22.33414175 5.14136184 1.54651743 3.61183731 9.77031530
## 2.26913073237509 14.31086432 4.39904998 3.06046468 7.01814048 9.23695888
## 3.40369609856263  7.61632311 2.44909543 2.79497131 6.28372295 5.49364605
## 4.53826146475017  3.88480497 1.22201755 1.93323889 4.25950297 2.86408591
## 5.67282683093771  1.95945373 0.59257373 1.17222443 2.53238317 1.43390143
## 6.80739219712526  0.98532144 0.28605334 0.66146056 1.40261930 0.71070555
## 7.9419575633128   0.49505945 0.13845651 0.35755263 0.74528997 0.35235378
## 9.07652292950034  0.24867759 0.06733454 0.18809095 0.38603340 0.17532029
## 10.2110882956879  0.12490745 0.03291927 0.09720092 0.19677252 0.08763195
## 11.3456536618754  0.06273825 0.01618008 0.04964350 0.09930483 0.04400929
##                         11         12         13        14         15        16
## 0                0.0000000  0.0000000 0.00000000  0.000000  0.0000000 0.0000000
## 1.13456536618754 1.6628903  6.3582456 0.90777933  0.182732  0.4700078 1.5537857
## 2.26913073237509 3.7972402 13.7993259 3.00733521  1.689051  4.3444331 5.6579381
## 3.40369609856263 3.9466755 13.5469423 3.47780670  4.210251 10.8292509 7.0245554
## 4.53826146475017 3.0759867  9.9337245 2.74514919  6.560066 16.8732462 5.8438452
## 5.67282683093771 2.0863070  6.3272736 1.80962659  8.246364 21.2105989 4.0061943
## 6.80739219712526 1.3093186  3.7280303 1.08301716  9.304606 23.9325210 2.4705810
## 7.9419575633128  0.7832755  2.0956592 0.61266150  9.920449 25.5165403 1.4312988
## 9.07652292950034 0.4539332  1.1430995 0.33473158 10.262949 26.3974900 0.7975337
## 10.2110882956879 0.2572825  0.6111321 0.17883787 10.448060 26.8736157 0.4333114
## 11.3456536618754 0.1434768  0.3222922 0.09415208 10.546261 27.1261995 0.2314956
##                          17         18
## 0                 0.0000000  0.0000000
## 1.13456536618754  0.1497774  0.7088394
## 2.26913073237509  1.4885547  7.0447642
## 3.40369609856263  3.9030505 18.4716562
## 4.53826146475017  6.2911352 29.7735543
## 5.67282683093771  8.0839396 38.2582166
## 6.80739219712526  9.2483096 43.7687378
## 7.9419575633128   9.9443419 47.0627943
## 9.07652292950034 10.3399492 48.9350537
## 10.2110882956879 10.5577013 49.9655916
## 11.3456536618754 10.6750540 50.5209775
## 
## 
## $hazard
## NULL
## 
## $hazard.scale
## NULL
## 
## attr(,"class")
## [1] "summary.msm"
invisible(c("Me apareció el siguiente mensaje"))
#Optimisation has probably not converged to the maximum likelihood - Hessian is not positive definite.
#Optimisation probably not converged to the maximum likelihood.
#optim() reported convergence but estimated Hessian not positive-definite.
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#MSTATEs
#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:#:
#See data structure:
#View(ebmt4)

invisible(c("tengo 2 problemas con esta aplicación: si veo ebmt, tiene formateado los casos en que cada columns es un tiempo a un determinado estado. Si llego a ese estado, entonces es un tiempo menor al person time y viene con un 0"))
 #file:///G:/Mi%20unidad/Alvacast/Srv%20Analysis/(Use%20R!)%20Frans%20Willekens%20-%20Multistate%20Analysis%20of%20Life%20Histories%20with%20R-Springer%20(2014).pdf
#In other words, the package requires one record for each potential transition. 

library("mstate")
#https://rdrr.io/cran/mstate/f/inst/doc/Tutorial.pdf 

#The first step in a multi-state model analysis is to set up the transition matrix. The transition
#matrix specifies which direct transitions are possible (those with NA are impossible) and assigns
#numbers to the transitions for future reference.
mstate_mat_cens_allowed <- matrix(c(
                          NA,1,1,NA,NA,NA,
                          1,NA,NA,1,NA,NA,
                          1,NA,NA,1,NA,NA,
                          NA,NA,NA,NA,1,1,
                          1,NA,NA,1,NA,NA,
                          1,NA,NA,1,NA,NA
                          ), nrow=6, ncol=6,
                          byrow=TRUE,
                          dimnames=list(from=1:6,to=1:6))
library("mstate")
tmat <- mstate::transMat(x = list(c(2, 3), 
                         c(1,4), 
                         c(1,4), 
                         c(5,6),
                         c(1,4),
                         c(1,4)),
                       names = c("Enter R", "Comp R", "N-Comp R", "Enter O", 
                                 "Comp O", "N-Comp O"))
print(tmat)
#1~"Enter to a Residential Treatment(1)",
#from==2~"Complete a Residential Treatment(2)",
#from==3~"Not Complete a Residential Treatment(3)",
#from==4~"Enter to an Outpatient Treatment(4)",
#from==5~"Complete an Outpatient Treatment(5)",
#from==6~"Not Complete an Outpatient Treatment(6)",
#from==7~"Ongoing treatment (Residential)(Censored)(7)",
#from==8~"Ongoing treatment (Outpatient)(Censored)(8)"

#https://www.rdocumentation.org/packages/msm/versions/1.6.8/topics/msm2Surv
msm2Surv(d_match_surv_msm[c("PTNUM","years_in_study_corr","state","min_achievement")],"PTNUM", "years_in_study_corr", "state", "min_achievement", data.frame(msm_mat_cens_allowed))

Session Info

path<-rstudioapi::getSourceEditorContext()$path

Sys.getenv("R_LIBS_USER")
## [1] "C:/Users/andre/Documents/R/win-library/4.0"
rstudioapi::getSourceEditorContext()
## Document Context: 
## - id:        '96F37798'
## - path:      'E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/SUD_CL/Proyecto_carla32.Rmd'
## - contents:  <1570 rows>
## Document Selection:
## - [1332, 1] -- [1332, 1]: ''
#save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")

if (grepl("CISS Fondecyt",path)==T){
    save.image("C:/Users/CISS Fondecyt/OneDrive/Escritorio/SUD_CL/mult_state.RData")
  } else if (grepl("andre",path)==T){
    save.image("C:/Users/andre/Desktop/SUD_CL/mult_state_carla_2.RData")
  } else if (grepl("E:",path)==T){
    save.image("E:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")
  } else {
    save.image("G:/Mi unidad/Alvacast/SISTRAT 2019 (github)/mult_state.RData")
  }

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19042)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=Spanish_Chile.1252  LC_CTYPE=Spanish_Chile.1252   
## [3] LC_MONETARY=Spanish_Chile.1252 LC_NUMERIC=C                  
## [5] LC_TIME=Spanish_Chile.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] msm_1.6.8               mstate_0.3.1            lubridate_1.7.9.2      
##  [4] Amelia_1.7.6            Rcpp_1.0.6              polycor_0.7-10         
##  [7] compareGroups_4.4.6     DiagrammeR_1.0.6.1      gurobi_9.1-1           
## [10] radiant.update_1.4.1    eha_2.8.5               cobalt_4.2.4           
## [13] sensitivityfull_1.5.6   sensitivity2x2xk_1.01   MatchIt_4.1.0          
## [16] tableone_0.12.0         stargazer_5.2.2         reshape2_1.4.4         
## [19] exactRankTests_0.8-31   gridExtra_2.3           foreign_0.8-81         
## [22] glpkAPI_1.3.2           designmatch_0.3.1       Rglpk_0.6-4            
## [25] slam_0.1-48             MASS_7.3-53.1           survMisc_0.5.5         
## [28] ggfortify_0.4.11        rateratio.test_1.0-2    survminer_0.4.8        
## [31] ggpubr_0.4.0            epiR_2.0.19             forcats_0.5.1          
## [34] purrr_0.3.4             readr_1.4.0             tibble_3.0.6           
## [37] tidyverse_1.3.0         treemapify_2.5.5        ggiraph_0.7.8          
## [40] chilemapas_0.2          sf_0.9-7                finalfit_1.0.2         
## [43] lsmeans_2.30-0          emmeans_1.5.4           choroplethrAdmin1_1.1.1
## [46] choroplethrMaps_1.0.1   choroplethr_3.7.0       acs_2.1.4              
## [49] XML_3.99-0.5            RColorBrewer_1.1-2      panelr_0.7.5           
## [52] lme4_1.1-26             Matrix_1.3-2            dplyr_1.0.4            
## [55] data.table_1.13.6       codebook_0.9.2          devtools_2.3.2         
## [58] usethis_2.0.0           sqldf_0.4-11            RSQLite_2.2.3          
## [61] gsubfn_0.7              proto_1.0.0             broom_0.7.4            
## [64] zoo_1.8-8               altair_4.1.1            rbokeh_0.5.1           
## [67] janitor_2.1.0           plotly_4.9.3            kableExtra_1.3.1       
## [70] Hmisc_4.4-2             Formula_1.2-4           survival_3.2-10        
## [73] lattice_0.20-41         ggplot2_3.3.3           stringr_1.4.0          
## [76] stringi_1.5.3           tidyr_1.1.2             knitr_1.31             
## [79] matrixStats_0.58.0      boot_1.3-27            
## 
## loaded via a namespace (and not attached):
##   [1] class_7.3-18        ps_1.5.0            rprojroot_2.0.2    
##   [4] crayon_1.4.1        V8_3.4.0            nlme_3.1-152       
##   [7] backports_1.2.1     reprex_1.0.0        ggcorrplot_0.1.3   
##  [10] rlang_0.4.10        readxl_1.3.1        performance_0.7.0  
##  [13] nloptr_1.2.2.2      callr_3.5.1         flextable_0.6.3    
##  [16] rjson_0.2.20        ggmap_3.0.0         bit64_4.0.5        
##  [19] glue_1.4.2          sjPlot_2.8.7        parallel_4.0.5     
##  [22] processx_3.4.5      classInt_0.4-3      tcltk_4.0.5        
##  [25] haven_2.3.1         tidyselect_1.1.0    km.ci_0.5-2        
##  [28] rio_0.5.16          sjmisc_2.8.6        chron_2.3-56       
##  [31] xtable_1.8-4        magrittr_2.0.1      evaluate_0.14      
##  [34] gdtools_0.2.3       RgoogleMaps_1.4.5.3 cli_2.3.1          
##  [37] rstudioapi_0.13     sp_1.4-5            rpart_4.1-15       
##  [40] jtools_2.1.2        sjlabelled_1.1.7    RJSONIO_1.3-1.4    
##  [43] maps_3.3.0          gistr_0.9.0         xfun_0.22          
##  [46] parameters_0.11.0   pkgbuild_1.2.0      cluster_2.1.1      
##  [49] ggfittext_0.9.1     expm_0.999-6        png_0.1-7          
##  [52] withr_2.4.1         bitops_1.0-6        tidycensus_0.11.4  
##  [55] plyr_1.8.6          cellranger_1.1.0    e1071_1.7-4        
##  [58] survey_4.0          coda_0.19-4         pillar_1.4.7       
##  [61] cachem_1.0.3        fs_1.5.0            vctrs_0.3.6        
##  [64] ellipsis_0.3.1      generics_0.1.0      rgdal_1.5-23       
##  [67] tools_4.0.5         munsell_0.5.0       fastmap_1.1.0      
##  [70] compiler_4.0.5      pkgload_1.1.0       abind_1.4-5        
##  [73] tigris_1.0          sessioninfo_1.1.1   visNetwork_2.0.9   
##  [76] jsonlite_1.7.2      WDI_2.7.2           scales_1.1.1       
##  [79] carData_3.0-4       estimability_1.3    lazyeval_0.2.2     
##  [82] car_3.0-10          latticeExtra_0.6-29 reticulate_1.18    
##  [85] effectsize_0.4.3    checkmate_2.0.0     rmarkdown_2.6      
##  [88] openxlsx_4.2.3      statmod_1.4.35      webshot_0.5.2      
##  [91] pander_0.6.3        yaml_2.2.1          systemfonts_1.0.0  
##  [94] htmltools_0.5.1.1   memoise_2.0.0       viridisLite_0.3.0  
##  [97] jsonvalidate_1.1.0  digest_0.6.27       assertthat_0.2.1   
## [100] rappdirs_0.3.3      repr_1.1.3          bayestestR_0.8.2   
## [103] BiasedUrn_1.07      KMsurv_0.1-5        units_0.7-0        
## [106] remotes_2.2.0       blob_1.2.1          splines_4.0.5      
## [109] labeling_0.4.2      hms_1.0.0           rmapshaper_0.4.4   
## [112] modelr_0.1.8        colorspace_2.0-0    base64enc_0.1-3    
## [115] nnet_7.3-15         mvtnorm_1.1-1       truncnorm_1.0-8    
## [118] R6_2.5.0            grid_4.0.5          crul_1.0.0         
## [121] lifecycle_1.0.0     labelled_2.7.0      zip_2.1.1          
## [124] writexl_1.3.1       curl_4.3            geojsonlint_0.4.0  
## [127] ggsignif_0.6.0      pryr_0.1.4          minqa_1.2.4        
## [130] testthat_3.0.1      snakecase_0.11.0    desc_1.2.0         
## [133] htmlwidgets_1.5.3   officer_0.3.16      crosstalk_1.1.1    
## [136] rvest_0.3.6         insight_0.12.0      htmlTable_2.1.0    
## [139] codetools_0.2-18    prettyunits_1.1.1   dbplyr_2.1.0       
## [142] vegawidget_0.3.2    gtable_0.3.0        DBI_1.1.1          
## [145] httr_1.4.2          highr_0.8           KernSmooth_2.23-18 
## [148] farver_2.0.3        uuid_0.1-4          hexbin_1.28.2      
## [151] mice_3.13.0         xml2_1.3.2          ggeffects_1.0.1    
## [154] bit_4.0.4           sjstats_0.18.1      jpeg_0.1-8.1       
## [157] pkgconfig_2.0.3     maptools_1.0-2      rstatix_0.6.0      
## [160] mitools_2.4         HardyWeinberg_1.7.1 Rsolnp_1.16        
## [163] httpcode_0.3.0